From 7a008ae6731ac69e7e08ccfa517c8a1a02db9803 Mon Sep 17 00:00:00 2001
From: Robert Plummer <robertleeplummerjr@gmail.com>
Date: Sun, 28 Aug 2016 21:52:18 -0400
Subject: [PATCH 1/5] switch to a much more basic math example

---
 character_demo.html | 2278 ++++++++-----------------------------------
 1 file changed, 427 insertions(+), 1851 deletions(-)
diff --git a/character_demo.html b/character_demo.html
index 80a61a0..f3dc50f 100644
--- a/character_demo.html
+++ b/character_demo.html
@@ -77,424 +77,6 @@
 
 <script src="src/recurrent.js"></script>
 <script src="src/vis.js"></script>
-
-<script type="text/javascript">
-
-// prediction params
-var sample_softmax_temperature = 1.0; // how peaky model predictions should be
-var max_chars_gen = 100; // max length of generated sentences
-
-// various global var inits
-var epoch_size = -1;
-var input_size = -1;
-var output_size = -1;
-var letterToIndex = {};
-var indexToLetter = {};
-var vocab = [];
-var data_sents = [];
-var solver = new R.Solver(); // should be class because it needs memory for step caches
-var pplGraph = new Rvis.Graph();
-
-var model = {};
-
-var initVocab = function(sents, count_threshold) {
-  // go over all characters and keep track of all unique ones seen
-  var txt = sents.join(''); // concat all
-
-  // count up all characters
-  var d = {};
-  for(var i=0,n=txt.length;i<n;i++) {
-    var txti = txt[i];
-    if(txti in d) { d[txti] += 1; } 
-    else { d[txti] = 1; }
-  }
-
-  // filter by count threshold and create pointers
-  letterToIndex = {};
-  indexToLetter = {};
-  vocab = [];
-  // NOTE: start at one because we will have START and END tokens!
-  // that is, START token will be index 0 in model letter vectors
-  // and END token will be index 0 in the next character softmax
-  var q = 1; 
-  for(ch in d) {
-    if(d.hasOwnProperty(ch)) {
-      if(d[ch] >= count_threshold) {
-        // add character to vocab
-        letterToIndex[ch] = q;
-        indexToLetter[q] = ch;
-        vocab.push(ch);
-        q++;
-      }
-    }
-  }
-
-  // globals written: indexToLetter, letterToIndex, vocab (list), and:
-  input_size = vocab.length + 1;
-  output_size = vocab.length + 1;
-  epoch_size = sents.length;
-  $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
-}
-
-var utilAddToModel = function(modelto, modelfrom) {
-  for(var k in modelfrom) {
-    if(modelfrom.hasOwnProperty(k)) {
-      // copy over the pointer but change the key to use the append
-      modelto[k] = modelfrom[k];
-    }
-  }
-}
-
-var initModel = function() {
-  // letter embedding vectors
-  var model = {};
-  model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
-  
-  if(generator === 'rnn') {
-    var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
-    utilAddToModel(model, rnn);
-  } else {
-    var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
-    utilAddToModel(model, lstm);
-  }
-
-  return model;
-}
-
-var reinit_learning_rate_slider = function() {
-  // init learning rate slider for controlling the decay
-  // note that learning_rate is a global variable
-  $("#lr_slider").slider({
-    min: Math.log10(0.01) - 3.0,
-    max: Math.log10(0.01) + 0.05,
-    step: 0.05,
-    value: Math.log10(learning_rate),
-    slide: function( event, ui ) {
-      learning_rate = Math.pow(10, ui.value);
-      $("#lr_text").text(learning_rate.toFixed(5));
-    }
-  });
-  $("#lr_text").text(learning_rate.toFixed(5));
-}
-
-var reinit = function() {
-  // note: reinit writes global vars
-  
-  // eval options to set some globals
-  eval($("#newnet").val());
-
-  reinit_learning_rate_slider();
-
-  solver = new R.Solver(); // reinit solver
-  pplGraph = new Rvis.Graph();
-
-  ppl_list = [];
-  tick_iter = 0;
-
-  // process the input, filter out blanks
-  var data_sents_raw = $('#ti').val().split('\n');
-  data_sents = [];
-  for(var i=0;i<data_sents_raw.length;i++) {
-    var sent = data_sents_raw[i].trim();
-    if(sent.length > 0) {
-      data_sents.push(sent);
-    }
-  }
-
-  initVocab(data_sents, 1); // takes count threshold for characters
-  model = initModel();
-}
-
-var saveModel = function() {
-  var out = {};
-  out['hidden_sizes'] = hidden_sizes;
-  out['generator'] = generator;
-  out['letter_size'] = letter_size;
-  var model_out = {};
-  for(var k in model) {
-    if(model.hasOwnProperty(k)) {
-      model_out[k] = model[k].toJSON();
-    }
-  }
-  out['model'] = model_out;
-  var solver_out = {};
-  solver_out['decay_rate'] = solver.decay_rate;
-  solver_out['smooth_eps'] = solver.smooth_eps;
-  step_cache_out = {};
-  for(var k in solver.step_cache) {
-    if(solver.step_cache.hasOwnProperty(k)) {
-      step_cache_out[k] = solver.step_cache[k].toJSON();
-    }
-  }
-  solver_out['step_cache'] = step_cache_out;
-  out['solver'] = solver_out;
-  out['letterToIndex'] = letterToIndex;
-  out['indexToLetter'] = indexToLetter;
-  out['vocab'] = vocab;
-  $("#tio").val(JSON.stringify(out));
-}
-
-var loadModel = function(j) {
-  hidden_sizes = j.hidden_sizes;
-  generator = j.generator;
-  letter_size = j.letter_size;
-  model = {};
-  for(var k in j.model) {
-    if(j.model.hasOwnProperty(k)) {
-      var matjson = j.model[k];
-      model[k] = new R.Mat(1,1);
-      model[k].fromJSON(matjson);
-    }
-  }
-  solver = new R.Solver(); // have to reinit the solver since model changed
-  solver.decay_rate = j.solver.decay_rate;
-  solver.smooth_eps = j.solver.smooth_eps;
-  solver.step_cache = {};
-  for(var k in j.solver.step_cache){
-      if(j.solver.step_cache.hasOwnProperty(k)){
-          var matjson = j.solver.step_cache[k];
-          solver.step_cache[k] = new R.Mat(1,1);
-          solver.step_cache[k].fromJSON(matjson);
-      }
-  }
-  letterToIndex = j['letterToIndex'];
-  indexToLetter = j['indexToLetter'];
-  vocab = j['vocab'];
-
-  // reinit these
-  ppl_list = [];
-  tick_iter = 0;
-}
-
-var forwardIndex = function(G, model, ix, prev) {
-  var x = G.rowPluck(model['Wil'], ix);
-  // forward prop the sequence learner
-  if(generator === 'rnn') {
-    var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
-  } else {
-    var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
-  }
-  return out_struct;
-}
-
-var predictSentence = function(model, samplei, temperature) {
-  if(typeof samplei === 'undefined') { samplei = false; }
-  if(typeof temperature === 'undefined') { temperature = 1.0; }
-
-  var G = new R.Graph(false);
-  var s = '';
-  var prev = {};
-  while(true) {
-
-    // RNN tick
-    var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
-    var lh = forwardIndex(G, model, ix, prev);
-    prev = lh;
-
-    // sample predicted letter
-    logprobs = lh.o;
-    if(temperature !== 1.0 && samplei) {
-      // scale log probabilities by temperature and renormalize
-      // if temperature is high, logprobs will go towards zero
-      // and the softmax outputs will be more diffuse. if temperature is
-      // very low, the softmax outputs will be more peaky
-      for(var q=0,nq=logprobs.w.length;q<nq;q++) {
-        logprobs.w[q] /= temperature;
-      }
-    }
-
-    probs = R.softmax(logprobs);
-    if(samplei) {
-      var ix = R.samplei(probs.w);
-    } else {
-      var ix = R.maxi(probs.w);  
-    }
-    
-    if(ix === 0) break; // END token predicted, break out
-    if(s.length > max_chars_gen) { break; } // something is wrong
-
-    var letter = indexToLetter[ix];
-    s += letter;
-  }
-  return s;
-}
-
-var costfun = function(model, sent) {
-  // takes a model and a sentence and
-  // calculates the loss. Also returns the Graph
-  // object which can be used to do backprop
-  var n = sent.length;
-  var G = new R.Graph();
-  var log2ppl = 0.0;
-  var cost = 0.0;
-  var prev = {};
-  for(var i=-1;i<n;i++) {
-    // start and end tokens are zeros
-    var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
-    var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
-
-    lh = forwardIndex(G, model, ix_source, prev);
-    prev = lh;
-
-    // set gradients into logprobabilities
-    logprobs = lh.o; // interpret output as logprobs
-    probs = R.softmax(logprobs); // compute the softmax probabilities
-
-    log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
-    cost += -Math.log(probs.w[ix_target]);
-
-    // write gradients into log probabilities
-    logprobs.dw = probs.w;
-    logprobs.dw[ix_target] -= 1
-  }
-  var ppl = Math.pow(2, log2ppl / (n - 1));
-  return {'G':G, 'ppl':ppl, 'cost':cost};
-}
-
-function median(values) {
-  values.sort( function(a,b) {return a - b;} );
-  var half = Math.floor(values.length/2);
-  if(values.length % 2) return values[half];
-  else return (values[half-1] + values[half]) / 2.0;
-}
-
-var ppl_list = [];
-var tick_iter = 0;
-var tick = function() {
-
-  // sample sentence fromd data
-  var sentix = R.randi(0,data_sents.length);
-  var sent = data_sents[sentix];
-
-  var t0 = +new Date();  // log start timestamp
-
-  // evaluate cost function on a sentence
-  var cost_struct = costfun(model, sent);
-  
-  // use built up graph to compute backprop (set .dw fields in mats)
-  cost_struct.G.backward();
-  // perform param update
-  var solver_stats = solver.step(model, learning_rate, regc, clipval);
-  //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
-
-  var t1 = +new Date();
-  var tick_time = t1 - t0;
-
-  ppl_list.push(cost_struct.ppl); // keep track of perplexity
-
-  // evaluate now and then
-  tick_iter += 1;
-  if(tick_iter % 50 === 0) {
-    // draw samples
-    $('#samples').html('');
-    for(var q=0;q<5;q++) {
-      var pred = predictSentence(model, true, sample_softmax_temperature);
-      var pred_div = '<div class="apred">'+pred+'</div>'
-      $('#samples').append(pred_div);
-    }
-  }
-  if(tick_iter % 10 === 0) {
-    // draw argmax prediction
-    $('#argmax').html('');
-    var pred = predictSentence(model, false);
-    var pred_div = '<div class="apred">'+pred+'</div>'
-    $('#argmax').append(pred_div);
-
-    // keep track of perplexity
-    $('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
-    $('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
-    $('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
-
-    if(tick_iter % 100 === 0) {
-      var median_ppl = median(ppl_list);
-      ppl_list = [];
-      pplGraph.add(tick_iter, median_ppl);
-      pplGraph.drawSelf(document.getElementById("pplgraph"));
-    }
-  }
-}
-
-var gradCheck = function() {
-  var model = initModel();
-  var sent = '^test sentence$';
-  var cost_struct = costfun(model, sent);
-  cost_struct.G.backward();
-  var eps = 0.000001;
-
-  for(var k in model) {
-    if(model.hasOwnProperty(k)) {
-      var m = model[k]; // mat ref
-      for(var i=0,n=m.w.length;i<n;i++) {
-        
-        oldval = m.w[i];
-        m.w[i] = oldval + eps;
-        var c0 = costfun(model, sent);
-        m.w[i] = oldval - eps;
-        var c1 = costfun(model, sent);
-        m.w[i] = oldval;
-
-        var gnum = (c0.cost - c1.cost)/(2 * eps);
-        var ganal = m.dw[i];
-        var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
-        if(relerr > 1e-1) {
-          console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
-        }
-      }
-    }
-  }
-}
-
-var iid = null;
-$(function() {
-
-  // attach button handlers
-  $('#learn').click(function(){ 
-    reinit();
-    if(iid !== null) { clearInterval(iid); }
-    iid = setInterval(tick, 0); 
-  });
-  $('#stop').click(function(){ 
-    if(iid !== null) { clearInterval(iid); }
-    iid = null;
-  });
-  $("#resume").click(function(){
-    if(iid === null) {
-      iid = setInterval(tick, 0); 
-    }
-  });
-
-  $("#savemodel").click(saveModel);
-  $("#loadmodel").click(function(){
-    var j = JSON.parse($("#tio").val());
-    loadModel(j);
-  });
-
-  $("#loadpretrained").click(function(){
-    $.getJSON("lstm_100_model.json", function(data) {
-      pplGraph = new Rvis.Graph();
-      learning_rate = 0.0001;
-      reinit_learning_rate_slider();
-      loadModel(data);
-    });
-  });
-
-  $("#learn").click(); // simulate click on startup
-
-  //$('#gradcheck').click(gradCheck);
-
-  $("#temperature_slider").slider({
-    min: -1,
-    max: 1.05,
-    step: 0.05,
-    value: 0,
-    slide: function( event, ui ) {
-      sample_softmax_temperature = Math.pow(10, ui.value);
-      $("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
-    }
-  });
-});
-
-</script>
 </head>
 
 <body>
@@ -519,1439 +101,7 @@ <h1>Deep Recurrent Nets character generation demo</h1>
   </div>
   <div>
     <div class="hh">Input sentences:</div>
-    <textarea style="width:100%; height:200px;" id="ti">
-the company has, say, 6 months of runway
-or to put it more brutally, 6 months before they're out of business
-they expect to avoid that by raising more from investors
-that last sentence is the fatal one
-it's hard to convince investors the first time too, but founders expect that
-what bites them the second time is a confluence of three forces:
-the company is spending more now than it did the first time it raised money
-investors have much higher standards for companies that have already raised money
-the company is now starting to read as a failure
-the first time it raised money, it was neither a success nor a failure; it was too early to ask
-i'm going to call the situation i described in the first paragraph "the fatal pinch
-one of the things that makes the fatal pinch so dangerous is that it's self-reinforcing
-y combinator tells founders who raise money to act as if it's the last they'll ever get
-i will now, by an amazing feat of clairvoyance, do this for you: the probability is zero
-you should shut down the company if you're certain it will fail no matter what you do
-companies rarely have to fail though
-what i'm really doing here is giving you the option of admitting you've already given up
-if you don't want to shut down the company, that leaves increasing revenues and decreasing expenses
-in most startups, expenses  people and decreasing expenses  firing people
-if so, now's the time
-which leaves two options, firing good people and making more money
-you should lean more toward firing people if the source of your trouble is overhiring
-plus those 15 people might not even be the ones you need for whatever you end up building
-so the solution may be to shrink and then figure out what direction to grow in
-it may seem facile to suggest a startup make more money, as if that could be done for the asking
-usually a startup is already trying as hard as it can to sell whatever it sells
-but only work on whatever will get you the most revenue the soonest
-or you may have expertise in some new field they don't understand
-and to the extent you can, try to avoid the worst pitfalls of consulting
-you keep the ip and no billing by the hour
-you just have to realize in time that you're near death
-and if you're in the fatal pinch, you are
-it struck me recently how few of the most successful people i know are mean
-there are exceptions, but remarkably few
-meanness isn't rare
-in fact, one of the things the internet has shown us is how mean people can be
-a few decades ago, only famous people and professional writers got to publish their opinions
-now everyone can, and we can all see the long tail of meanness that had previously been hidden
-what's going on here? are meanness and success inversely correlated?
-part of what's going on, of course, is selection bias
-i only know people who work in certain fields: startup founders, programmers, professors
-i'm willing to believe that successful people in other fields are mean
-maybe successful hedge fund managers are mean; i don't know enough to say
-it seems quite likely that most successful drug lords are mean
-being married to her is like standing next to an airport baggage scanner
-why? i think there are several reasons
-one is that being mean makes you stupid
-that's why i hate fights
-you never do your best work in a fight, because fights are not sufficiently general
-winning is always a function of the situation and the people involved
-and yet fighting is just as much work as thinking about real problems
-startups don't win by attacking
-they win by transcending
-there are exceptions of course, but usually the way to win is to race ahead, not to stop and fight
-another reason mean founders lose is that they can't get the best people to work for them
-they can hire people who will put up with them because they need a job
-but the best people have other options
-a mean person can't convince the best people to work for him unless he is super convincing
-and while having the best people helps any organization, it's critical for startups
-the startup founders who end up richest are not the ones driven by money
-[1] the ones who keep going are driven by something else
-they may not say so explicitly, but they're usually trying to improve the world
-which means people with a desire to improve the world have a natural advantage
-this kind of work is the future
-for most of history success meant control of scarce resources
-for most of history, success meant success at zero-sum games
-and in most of them meanness was not a handicap but probably an advantage
-that is changing
-increasingly the games that matter are not zero-sum
-there have long been games where you won by having new ideas
-in the third century bc archimedes won by doing that
-at least until an invading roman army killed him
-and not just not being at war
-people need to feel that what they create can't be stolen
-that has always been the case for thinkers, which is why this trend began with them
-the exciting thing is that their m
-seems to be spreading
-so i'm really glad i stopped to think about this
-jessica and i have always worked hard to teach our kids not to be mean
-we tolerate noise and mess and junk food, but not meanness
-startups are very counterintuitive
-i'm not sure why
-maybe it's just because knowledge about them hasn't permeated our culture yet
-but whatever the reason, starting a startup is a task where you can't always trust your instincts
-it's like skiing in that way
-when you first try skiing and you want to slow down, your instinct is to lean back
-but if you lean back on skis you fly down the hill out of control
-so part of learning to ski is learning to suppress that impulse
-eventually you get new habits, but at first it takes a conscious effort
-at first there's a list of things you're trying to remember as you start down the hill
-startups are as unnatural as skiing, so there's a similar list for startups
-counterintuitive
-if you know nothing more than this, you may at least pause before making them
-it's really true
-they seem wrong
-so of course your first impulse is to disregard them
-if founders' instincts already gave them the right answers, they wouldn't need us
-you only need other people to give you advice that surprises you
-that's why there are a lot of ski instructors and not many running instructors
-you can, however, trust your instincts about people
-and in fact one of the most common mistakes young founders make is not to do that enough
-if someone seems slippery, or bogus, or a jerk, don't ignore it
-this is one case where it pays to be self-indulgent
-work with people you genuinely like, and you've known long enough to be sure
-the second counterintuitive point is that it's not that important to know a lot about startups
-mark zuckerberg didn't succeed because he was an expert on startups
-if you don't know anything about, say, how to raise an angel round, don't feel bad on that account
-that sort of thing you can learn when you need to, and forget after you've done it
-" it would set off alarms
-from the outside that seems like what startups do
-we saw this happen so often that we made up a name for it: playing house
-eventually i realized why it was happening
-think about what you have to do to get into college, for example
-extracurricular activities, check
-even in college classes most of the work is as artificial as running laps
-i'm not attacking the educational system for being this way
-i confess i did it myself in college
-it was like a game
-then they want to know what the tricks are for growing fast
-and we have to tell them the best way to do that is simply to make something people want
-" and the partner replying "just
-gaming the system may continue to work if you go to work for a big company
-[2] but that doesn't work with startups
-startups are as impersonal as physics
-you have to make something people want, and you prosper only to the extent you do
-the dangerous thing is, faking does work to some degree on investors
-but it's not in your interest to
-the company is ultimately doomed
-all you're doing is wasting your own time riding it down
-so stop looking for the trick
-it's exciting that there even exist parts of the world where you win by doing good work
-how do you win in each type of work, and what would you like to win by doing? [4]
-all-consuming
-that brings us to our fourth counterintuitive point: startups are all-consuming
-if you start a startup, it will take over your life to a degree you cannot imagine
-so there is a real opportunity cost here
-larry page may seem to have an enviable life, but there are aspects of it that are unenviable
-if he goes on vacation for even a week, a whole week's backlog of shit accumulates
-it never gets any easier
-the nature of the problems change
-but the total volume of worry never decreases; if anything it increases
-many of which will make you a better parent when you do have kids
-and since you can delay pushing the button for a while, most people in rich countries do
-to be fair, the universities have their hand forced here
-a lot of incoming students are interested in startups
-universities are, at least de facto, expected to prepare them for their careers
-so students who want to start startups hope universities can teach them about startups
-can universities teach students about startups? yes and no
-[5] so starting a startup is intrinsically something you can only really learn by doing it
-you may be nominally a student for a bit, but you won't even be that for long
-do not start a startup in college
-starting a startup is like a brutally fast depth-first search
-most people should still be searching breadth-first at 20
-if you start a startup at 20 and you're sufficiently successful, you'll never get to do it
-mark zuckerberg will never get to bum around a foreign country
-he can do other things most people can't, like charter jets to fly him to foreign countries
-but success has taken a lot of the serendipity out of his life
-facebook is running him as much as he's running facebook
-among other things it gives you more options to choose your life's work from
-there's not even a tradeoff here
-should you do it at any age? i realize i've made startups sound pretty hard
-if i haven't, let me try again: starting a startup is really hard
-what if it's too hard? how can you tell if you're up to this challenge?
-the answer is the fifth counterintuitive point: you can't tell
-starting a startup will change you a lot
-it was easy to tell how smart they were, and most people reading this will be over that threshold
-the hard part was predicting how tough and ambitious they would become
-the founders sometimes think they know
-if you're absolutely terrified of starting a startup, you probably shouldn't do it
-but if you're merely unsure whether you're up to it, the only way to find out is to try
-just not now
-for getting both is the same
-i've written a whole essay on this, so i won't repeat it all here
-the way to come up with good startup ideas is to take a step back
-in fact, so unconsciously that you don't even realize at first that they're startup ideas
-this is not only possible, it's how apple, yahoo, google, and facebook all got started
-none of these companies were even meant to be companies at first
-they were all just side projects
-the third part, incidentally, is how you get cofounders at the same time as the idea
-" but that prescription, though sufficient, is too narrow
-what was special about brian chesky and joe gebbia was not that they were experts in technology
-what kind of problems are those? that is very hard to answer in the general case
-so how do you know when you're working on real stuff? [8]
-i know how i know
-y combinator itself was something i only did because it seemed interesting
-so i seem to have some sort of internal compass that helps me out
-but i don't know what other people have in their heads
-and indeed, probably also the best way to live
-you may not realize they're startup ideas, but you'll know they're something that ought to exist
-he didn't mean it to be a startup, and he never tried to turn it into one
-" it's the classic version of college as education for its own sake
-the component of entrepreneurship that really matters is domain expertise
-the way to become larry page was to become an expert on search
-at its best, starting a startup is merely an ulterior motive for curiosity
-and you'll do it best if you introduce the ulterior motive toward the end of the process
-most startups that raise money do it more than once
-reality can be messier
-some companies raise money twice in phase 2
-others skip phase 1 and go straight to phase 2
-but the three phase path is at least the one about which individual startups' paths oscillate
-this essay focuses on phase 2 fundraising
-that problem is irreducible; it should be hard
-but much of the other kind of difficulty can be eliminated
-you can't trust your intuitions
-i'm going to give you a set of rules here that will get you through this process if anything will
-at certain moments you'll be tempted to ignore them
-so rule number zero is: these rules exist for a reason
-the ultimate source of the forces acting on you are the forces acting on investors
-but that fast growth means investors can't wait around
-if you wait till a startup is obviously a success, it's too late
-but that in turn makes investors nervous they're about to invest in a flop
-as indeed they often are
-what investors would like to do, if they could, is wait
-but if you wait too long, other investors might take the deal away from you
-and of course the other investors are all subject to the same forces
-don't raise money unless you want it and it wants you
-actually it isn't
-rapid growth is what makes a company a startup
-the other time not to raise money is when you won't be able to
-be in fundraising mode or not
-one of the things that surprises founders most about fundraising is how distracting it is
-when you start fundraising, everything else grinds to a halt
-the problem is not the time fundraising consumes but that it becomes the top idea in your mind
-a startup can't endure that level of distraction for long
-because fundraising is so distracting, a startup should either be in fundraising mode or not
-you can take money from investors when you're not in fundraising mode
-you just can't expend any attention on it
-there are two things that take attention: convincing investors, and negotiating with them
-[3] the terms will be whatever they turn out to be in your next equity round
-investors will try to lure you into fundraising when you're not
-it's great for them if they can, because they can thereby get a shot at you before everyone else
-they'll send you emails saying they want to meet to learn more about you
-deals don't happen that way
-they may say they just want to meet and chat, but investors never just want to meet and chat
-get introductions to investors
-before you can talk to investors, you have to be introduced to them
-if you're presenting at a demo day, you'll be introduced to a whole bunch simultaneously
-but even if you are, you should supplement these with intros you collect yourself
-do you have to be introduced? in phase 2, yes
-intros vary greatly in effectiveness
-the best type of intro is from a well-known investor who has just invested in you
-so when you get an investor to commit, ask them to introduce you to other investors they respect
-[7] the next best type of intro is from a founder of a company they've funded
-you can also get intros from other people in the startup community, like lawyers and reporters
-there are now sites like angellist, fundersclub, and wefunder that can introduce you to investors
-we recommend startups treat them as auxiliary sources of money
-raise money first from leads you get yourself
-those will on average be better investors
-hear no till you hear yes
-i mentioned earlier that investors prefer to wait if they can
-what's particularly dangerous for founders is the way they wait
-essentially, they lead you on
-they seem like they're about to invest right up till the moment they say no
-if they even say no
-some of the worse ones never actually do say no; they just stop replying to your emails
-they hope that way to get a free option on investing
-that's not the worst thing investors will do
-and wishful thinking founders are happy to meet them half way
-fortunately, the next rule is a tactic for neutralizing this behavior
-but to work it depends on you not being tricked by the no that sounds like yes
-if you believe an investor has committed, get them to confirm it
-and till they confirm, regard them as saying no
-do breadth-first search weighted by expected value
-when you talk to investors your m
-should be breadth-first search, weighted by expected value
-you should always talk to investors in parallel rather than serially
-meet such investors last, if at all
-but you have to be disciplined about assigning probabilities
-you can't let how much you want an investor influence your estimate of how much they want you
-know where you stand
-never leave a meeting with an investor without asking what happens next
-if you're experienced at negotiations, you already know how to ask such questions
-[13] if you're not, there's a trick you can use in this situation
-investors know you're inexperienced at raising money
-inexperience there doesn't make you unattractive
-larry and sergey were noobs at fundraising
-get the first commitment
-the biggest factor in most investors' opinions of you is the opinion of other investors
-once you start getting investors to commit, it becomes increasingly easy to get more to
-but the other side of this coin is that it's often hard to get the first commitment
-getting the first substantial offer can be half the total difficulty of fundraising
-what counts as a substantial offer depends on who it's from and how much it is
-money from friends and family doesn't usually count, no matter how much
-close committed money
-it's not a deal till the money's in the bank
-and it's also one that furnishes them plenty of excuses to gratify it
-the public markets snap startup investing around like a whip
-if the chinese economy blows up tomorrow, all bets are off
-tomorrow a big competitor could appear, or you could get cded, or your cofounder could quit
-even a day's delay can bring news that causes an investor to change their mind
-so when someone commits, get the money
-knowing where you stand doesn't end when they say they'll invest
-inexperienced investors are the ones most likely to get buyer's remorse
-but i've heard of cases of even top-tier vc firms welching on deals
-avoid investors who don't "lead
-some investors are known for deciding quickly, and those are extra valuable early on
-conversely, an investor who will only invest once other investors have is worthless initially
-you can recognize this contemptible subspecies of investor because they often talk about "leads
-" they say that they don't lead, or that they'll invest once you have a lead
-now there are rarely actual rounds before the a round, or leads for them
-now startups simply raise money from investors one at a time till they feel they have enough
-the spectral signature of all mediocre investors
-have multiple plans
-many investors will ask how much you're planning to raise
-this question makes founders feel they should be planning to raise a specific amount
-but in fact you shouldn't
-it's a mistake to have fixed plans in an undertaking as unpredictable as fundraising
-" i've known a handful of founders who could pull that off without having vcs laugh in their faces
-different plans match different investors
-$15k per month is high, so don't actually spend that much
-but it's ok to use a high estimate when fundraising to add a margin for error
-if you have additional expenses, like manufacturing, add in those at the end
-underestimate how much you want
-then when you reach $150k you're more than half done
-whereas if you'd said you were raising $500k, you'd be less than a third done at $150k
-if fundraising stalled there for an appreciable time, you'd start to read as a failure
-saying initially that you're raising $250k doesn't limit you to raising that much
-startups do that all the time
-i'm not saying you should lie, but that you should lower your expectations initially
-there is almost no downside in starting with a low number
-it not only won't cap the amount you raise, but will on the whole tend to increase it
-a good metaphor here is angle of attack
-if you try to fly at too steep an angle of attack, you just stall
-be profitable if you can
-if you can make it to profitability without raising any additional money
-there are many analogies between fundraising and dating, and this is one of the strongest
-no one wants you if you seem desperate
-and the best way not to seem desperate is not to be desperate
-and they are then surprised how difficult and unpleasant it is
-of course not all startups can make it to ramen profitability in a few months
-don't optimize for valuation
-founders who raise money at high valuations tend to be unduly proud of it
-this is stupid, because fundraising is not the test that matters
-the real test is revenue
-fundraising is just a means to that end
-being proud of how well you did at fundraising is like being proud of your college grades
-number two is good investors
-valuation is at best third
-the empirical evidence shows just how unimportant it is
-6 million respectively
-so let that satisfy your competitiveness
-you're doing better than dropbox and airbnb at a test that doesn't matter
-it will be easier to raise money at a lower valuation
-it shouldn't be, but it is
-but although it's a mistake for investors to care about price, a significant number do
-yesno before valuation
-some investors want to know what your valuation is before they even talk to you about investing
-fortunately there is a way to avoid naming a price in this situation
-and it is not just a negotiating trick; it's how you (both) should be operating
-then if they decide they do want to invest, you can figure out a price
-but first things first
-this is a safe technique so long as you combine it with the next one
-beware "valuation sensitive" investors
-occasionally you'll encounter investors who describe themselves as "valuation sensitive
-you should therefore never approach such investors first
-this way, you'll not only get market price, but it will also take less time
-so you'd only want to talk to this sort of investor if you were about to do that anyway
-if you're surprised by a lowball offer, treat it as a backup offer and delay responding to it
-but lowballing you is a dick move that should be met with the corresponding countermove
-accept offers greedily
-a greedy algorithm takes the best of the options in front of it right now
-and that is how startups should approach fundraising in phases 2 and later
-if someone makes you an acceptable offer, take it
-if you have multiple incompatible offers, take the best
-don't reject an acceptable offer in the hope of getting a better one in the future
-these simple rules cover a wide variety of cases
-if you're raising money from many investors, roll them up as they say yes
-as you start to feel you've raised enough, the threshold for acceptable will start to get higher
-in practice offers exist for stretches of time, not points
-so when you get an acceptable offer that would be incompatible with others (e
-this could lose you some that might have made an offer if they had more time
-but by definition you don't care; the initial offer was acceptable
-a deadline of three working days is acceptable
-you shouldn't need more than that if you've been talking to investors in parallel
-but a deadline any shorter is a sign you're dealing with a sketchy investor
-you can usually call their bluff, and you may need to
-but if it does, "get the best investors" is in the average case bad advice
-the best investors are also the most selective, because they get their pick of all the startups
-(the situation is different in phase 1
-there's no practical difficulty
-if the smaller investments are on convertible notes, they'll just convert into the series a round
-till they do, you don't know for sure they will, and the greedy algorithm tells you what to do
-don't sell more than 25% in phase 2
-if you do well, you will probably raise a series a round eventually
-i say probably because things are changing with series a rounds
-startups may start to skip them
-which means you should avoid doing things in earlier rounds that will mess up raising an a round
-guess conservatively
-have one person handle fundraising
-(if the founders mistrust one another, this could cause some friction
-even if the ceo is a programmer and another founder is a salesperson? yes
-but wait till that point
-you'll need an executive summary and (maybe) a deck
-traditionally phase 2 fundraising consists of presenting a slide deck in person to investors
-a lot of the most successful startups we fund never make decks in phase 2
-they just talk to investors and explain what they plan to do
-but don't refuse on that account to give copies to investors you meet
-you just have to treat such leaks as a cost of doing business
-in practice it's not that high a cost
-i wouldn't do that
-it's a sign they're not really interested
-stop fundraising when it stops working
-when do you stop fundraising? ideally when you've raised enough
-but what if you haven't raised as much as you'd like? when do you give up?
-when your fundraising options run out, they usually run out in the same way
-don't keep sucking on the straw if you're just getting air
-it's not going to get better
-don't get addicted to fundraising
-the work at an early stage startup often consists of unglamorous schleps
-whereas fundraising, when it's going well, can be quite the opposite
-the danger of fundraising is particularly acute for people who are good at it
-it's always fun to work on something you're good at
-if you're one of these people, beware
-fundraising is not what will make your company successful
-listening to users complain about bugs in your software is what will make you successful
-startups can be destroyed by this
-don't raise too much
-though only a handful of startups have to worry about this, it is possible to raise too much
-the dangers of raising too much are subtle but insidious
-one is that it will set impossibly high expectations
-a company's valuation is expected to rise each time it raises money
-if not it's a sign of a company in trouble, which makes you unattractive to investors
-and you have to be doing really, really well to raise money at $50 million
-but the money itself may be more dangerous than the valuation
-so if you do raise a huge amount of money, don't spend it
-startups raising money occasionally alienate investors by seeming arrogant
-it's a mistake to behave arrogantly to investors
-the only safe strategy is never to seem arrogant at all
-so you must cushion the blow with soft words
-at yc we tell startups they can blame us
-and now that i've written this, everyone else can blame me if they want
-the danger of behaving arrogantly is greatest when you're doing well
-when everyone wants you, it's hard not to let it go to your head
-especially if till recently no one wanted you
-but restrain yourself
-the startup world is a small place, and startups have lots of ups and downs
-this is a domain where it's more true than usual that pride goeth before a fall
-be nice when investors reject you as well
-the best investors are not wedded to their initial opinion of you
-if they reject you in phase 2 and you end up doing well, they'll often invest in phase 3
-in fact investors who reject you are some of your warmest leads for future fundraising
-any investor who spent significant time deciding probably came close to saying yes
-the bar will be higher next time
-assume the money you raise in phase 2 will be the last you ever raise
-you must make it to profitability on this money if you can
-this is probably the optimal strategy for investors
-it's too hard to pick winners early on
-better to let the market do it for you
-but it often comes as a surprise to startups how much harder it is to raise money in phase 3
-the next time you raise money, the experiment has to have worked
-you have to be on a trajectory that leads to going public
-and while there are some ideas where the proof that the experiment worked might consist of e
-query response times, usually the proof is profitability
-usually phase 3 fundraising has to be type a fundraising
-in practice there are two ways startups hose themselves between phases 2 and 3
-some are just too slow to become profitable
-they raise enough money to last for two years
-there doesn't seem any particular urgency to be profitable
-so they don't make any effort to make money for a year
-but by that time, not making money has become habitual
-when they finally decide to try, they find they can't
-the other way companies hose themselves is by letting their expenses grow too fast
-which almost always means hiring too many people
-you usually shouldn't go out and hire 8 people as soon as you raise money at phase 2
-usually you want to wait till you have growth (and thus usually revenues) to justify them
-a lot of vcs will encourage you to hire aggressively
-don't listen to them
-don't make things complicated
-that's fundraising in one sentence
-don't introduce complicated optimizations, and don't let investors introduce complications either
-fundraising is not what will make you successful
-it's just a means to an end
-be good, take care of yourselves, and don't leave the path
-the biggest component in most investors' opinion of you is the opinion of other investors
-which is of course a recipe for exponential growth
-but actually the two are not that highly correlated
-if you understand them, you can at least avoid being surprised
-raising money decreases the risk of failure
-plus a company that has raised money is literally more valuable
-though they're often clueless about technology, most investors are pretty good at reading people
-when fundraising is going well, investors are quick to sense it in your increased confidence
-judging startups is hard even for the best investors
-the mediocre ones might as well be flipping coins
-the best investors aren't influenced much by the opinion of other investors
-it would only dilute their own judgment to average it together with other people's
-this is the fourth way in which offers beget offers
-founders try this sort of thing all the time, and investors are very sensitive to it
-if anything oversensitive
-but you're safe so long as you're telling the truth
-there's no manipulation in that
-do not, however, tell a who b is
-vcs will sometimes ask which other vcs you're talking to, but you should never tell them
-angels you can sometimes tell about other angels, because angels cooperate more with one another
-the second will be easier
-the right way to lift heavy things is to let your legs do the work
-inexperienced founders make the same mistake when trying to convince investors
-they try to convince with their pitch
-investors are looking for startups that will be very successful
-but that test is not as simple as it sounds
-the big successes are so big they dwarf the rest
-but angel investors like big successes too
-the most important ingredient is formidable founders
-[2] every startup has reasons both to invest and not to invest
-if investors think you're a winner they focus on the former, and if not they focus on the latter
-for example, it might be a rich market, but with a slow sales cycle
-they're not necessarily trying to mislead you
-most investors are genuinely unclear in their own minds why they like or dislike startups
-if you seem like a winner, they'll like your idea more
-but don't be too smug about this weakness of theirs, because you have it too; almost everyone does
-there is a role for ideas of course
-they're fuel for the fire that starts with liking the founders
-" (whereas when they don't like you, they'll be saying "but what about x?")
-formidable is close to confident, except that someone could be confident and mistaken
-formidable is roughly justifiably confident
-what should they do? [4]
-what they should not do is try to imitate the swagger of more experienced founders
-investors are not always that good at judging technology, but they're good at judging confidence
-if you try to act like something you're not, you'll just end up in an uncanny valley
-you'll depart from sincere, but never arrive at convincing
-the way to seem most formidable as an inexperienced founder is to stick to the truth
-how formidable you seem isn't a constant
-it varies depending on what you're saying
-that's the secret
-and by convince yourself, i don't mean play mind games with yourself to boost your confidence
-i mean truly evaluate whether your startup is worth investing in
-if it isn't, don't try to raise money
-to evaluate whether your startup is worth investing in, you have to be a domain expert
-which in fact it will usually be
-know everything about your market
-when the unfortunate fellow got to his last slide, the professor burst out:
-which one of these conclusions do you actually believe?
-even if you have no ideas
-you have to produce something
-and all too many startups go into fundraising in the same spirit
-it's when you can convince investors, and not before
-if you try convincing investors before you've convinced yourself, you'll be wasting both your time
-but pausing first to convince yourself will do more than save you from wasting your time
-it will force you to organize your thoughts
-and if you can do that you'll end up with more than added confidence
-you'll also have a provisional roadmap of how to succeed
-no one knows whether a startup is going to succeed
-startup investors know that every investment is a bet, and against pretty long odds
-founders think of startups as ideas, but investors think of them as markets
-your target market has to be big, and it also has to be capturable by you
-but the market doesn't have to be big yet, nor do you necessarily have to be in it yet
-the standard of plausibility varies dramatically depending on the age of the startup
-microsoft for example was not going to grow huge selling basic interpreters
-good, but not great
-no company, however successful, ever looks more than a pretty good bet a few months in
-microcomputers turned out to be a big deal, and microsoft both executed well and got lucky
-but it was by no means obvious that this was how things would play out
-plenty of companies seem as good a bet a few months in
-and who can reasonably expect more of a startup than that?
-if you can make as good a case as microsoft could have, will you convince investors? not always
-a lot of vcs would have rejected microsoft
-[9] certainly some rejected google
-this is arguably a permissible tactic
-it's arguably an instance of scamming a scammer
-if you know you're on the right track, then you also know why investors were wrong to reject you
-experienced investors are well aware that the best ideas are also the scariest
-they all know about the vcs who rejected google
-that's what happened to dropbox
-yet another backup and syncing thing, they all thought
-a couple weeks later, dropbox raised a series a round from sequoia
-you can convince yourself, then convince them
-and when you convince them, use the same matter-of-fact language you used to convince yourself
-you wouldn't use vague, grandiose marketing-speak among yourselves
-don't use it with investors either
-it not only doesn't work on them, but seems a mark of incompetence
-just be concise
-so here's the recipe for impressing investors when you're not already good at seeming formidable:
-make something worth investing in
-understand why it's worth investing in
-explain that clearly to investors
-if you're saying something you know is true, you'll seem confident when you're saying it
-conversely, never let pitching draw you into bullshitting
-as long as you stay on the territory of truth, you're strong
-make the truth good, then just tell it
-one of the most common types of advice we give at y combinator is to do things that don't scale
-a lot of would-be founders believe that startups either take off or don't
-or they don't, in which case the market must not exist
-actually startups take off because the founders make them take off
-a good metaphor would be the cranks that car engines had before they got electric starters
-the most common unscalable thing founders have to do at the start is to recruit users manually
-nearly all startups have to
-you can't wait for users to come to you
-you have to go out and get them
-if anyone could have sat back and waited for users, it was stripe
-but in fact they're famous within yc for aggressive early user acquisition
-at yc we use the term "collison installation" for the technique they invented
-" but the collison brothers weren't going to wait
-there are two reasons founders resist going out and recruiting users individually
-one is a combination of shyness and laziness
-the other reason founders ignore this path is that the absolute numbers seem so small at first
-this can't be how the big, famous startups got started, they think
-the mistake they make is to underestimate the power of compound growth
-we encourage every startup to measure their progress by weekly growth rate
-if you have 100 users, you need to get 10 more next week to grow 10% a week
-after a year you'll have 14,000 users, and after 2 years you'll have 2 million
-airbnb is a classic example of this technique
-marketplaces are so hard to get rolling that you should expect to take heroic measures at first
-that initial fragility was not a unique feature of airbnb
-almost all startups are fragile initially
-they unconsciously judge larval startups by the standards of established ones
-it's harmless if reporters and know-it-alls dismiss your startup
-they always get things wrong
-it's even ok if investors dismiss your startup; they'll change their minds when they see growth
-the big danger is that you'll dismiss your startup yourself
-i've seen it happen
-i often have to encourage founders who don't see the full potential of what they're building
-even bill gates made that mistake
-he returned to harvard for the fall semester after starting microsoft
-they were just trying to survive
-but in retrospect that too was the optimal path to dominating a big market
-otherwise you'll have to make a more deliberate effort to locate the most promising vein of users
-you should take extraordinary measures not just to acquire users, but also to make them happy
-your first users should feel that signing up with you was one of the best choices they ever made
-and you in turn should be racking your brains to think of new ways to delight them
-you can be ornery when you're scotty, but not when you're kirk
-that would be a great problem to have
-see if you can make it happen
-tim cook doesn't send you a hand-written note after you buy a laptop
-but you can
-that's one advantage of being small: you can provide a level of service no big company can
-steve wasn't just using "insanely" as a synonym for "very
-what novice founders don't get is what insanely great translates to in a larval startup
-when steve jobs started using that phrase, apple was already an established company
-that's not hard for engineers to grasp
-it's just a more extreme version of designing a robust and elegant product
-it's not the product that should be insanely great, but the experience of being your user
-the product is just one component of that
-for a big company it's necessarily the dominant one
-can, perhaps, but should? yes
-over-engaging with early users is not just a permissible technique for getting growth rolling
-making a better mousetrap is not an atomic operation
-the feedback you get from engaging directly with your earliest users will be the best you ever get
-sometimes the right unscalable trick is to focus on a deliberately narrow market
-it's like keeping a fire contained at first to get it really hot before adding more logs
-that's what facebook did
-at first it was just for harvard students
-most startups that use the contained fire strategy do it unconsciously
-the strategy works just as well if you do it unconsciously
-among companies, the best early adopters are usually other startups
-plus when they succeed they grow fast, and you with them
-they got started by doing something that really doesn't scale: assembling their routers themselves
-hardware startups face an obstacle that software startups don't
-the minimum order for a factory production run is usually several hundred thousand dollars
-the arrival of crowdfunding (or more precisely, preorders) has helped a lot
-but even so i'd advise startups to pull a meraki initially if they can
-that's what pebble did
-the pebbles assembled the first several hundred watches themselves
-" who knew?
-even if there aren't many of them, there are probably adjacent territories that have more
-consulting is the canonical example of work that doesn't scale
-that's where companies cross the line
-we did that at viaweb
-since we would do anything to get users, we did
-we felt pretty lame at the time
-there's a more extreme variant where you don't just use your software, but are your software
-some startups could be entirely manual at first
-i should mention one sort of initial tactic that usually doesn't work: the big launch
-they want to launch simultaneously in 8 different publications, with embargoes
-and on a tuesday, of course, since they read somewhere that's the optimum day to launch something
-it's easy to see how little launches matter
-think of some successful startups
-so why do founders think launches matter? a combination of solipsism and laziness
-partnerships too usually don't work
-it's not enough just to do something extraordinary initially
-you have to make an extraordinary effort initially
-y combinator has now funded 564 startups including the current batch, which has 53
-7 billion, and the 511 prior to the current batch have collectively raised about $1
-as usual those numbers are dominated by a few big winners
-the top 10 startups account for 8
-6 of that 11
-but there is a peloton of younger startups behind them
-there are about 40 more that have a shot at being really big
-i'd guess we can grow another 2 or 3x before hitting the next bottleneck
-one consequence of funding such a large number of startups is that we see trends early
-i'm going to take a shot at describing where these trends are leading
-i think more
-now there's a third: start your own company
-that's a big change
-i think we're still at the beginning of this one
-it's hard to predict how big a deal it will be
-as big a deal as the industrial revolution? maybe
-probably not
-one thing we can say for sure is that there will be a lot more startups
-this process is not just something happening now in silicon valley
-it started decades ago, and it's happening as far afield as the car industry
-it has a long way to run
-the other big driver of change is that startups are becoming cheaper to start
-which means investors will get less stock and less control
-there are still a lot of people who'd make great founders who never end up starting a company
-you can see that from how randomly some of the most successful startups got started
-there might be 10x or even 50x more good founders out there
-high returns don't come from investing at low valuations
-they come from investing in the companies that do really well
-so if there are more of those to be had each year, the best pickers should have more hits
-this means there should be more variability in the vc business
-whereas the bad firms will get the leftovers, as they do now, and yet pay a higher price for them
-nor do i think it will be a problem that founders keep control of their companies for longer
-what about angels? i think there is a lot of opportunity there
-it used to suck to be an angel investor
-and the days when vcs could wash angels out of the cap table are long gone
-few investors understand the cost that raising money from them imposes on startups
-and in this context, low-cost means deciding quickly
-one is that the scariness of starting a startup in the old days was a pretty effective filter
-now that the cost of failing is becoming lower, we should expect founders to do it more
-that's not a bad thing
-it will be interesting, in a bad way, if idea clashes become a lot more common
-what used to be an obelisk will become a pyramid
-it will be a little wider at the top, but a lot wider at the bottom
-imagine the obelisk of investors that corresponds to the obelisk of startups
-i think the biggest danger for vcs, and also the biggest opportunity, is at the series a stage
-right now, vcs often knowingly invest too much money at the series a stage
-some vcs lie and claim the company really needs that much
-like a lot of bad things, this didn't happen intentionally
-the vc business backed into it as their initial assumptions gradually became obsolete
-what will happen to the vc business when that happens? hell if i know
-but i bet that particular firm will end up ahead
-and that's where the money is
-you can't fight market forces forever
-40% used to be common
-now vcs are fighting to hold the line at 20%
-but i am daily waiting for the line to collapse
-it's going to happen
-you may as well anticipate it, and look bold
-who knows, maybe vcs will make more money by doing the right thing
-it wouldn't be the first time that happened
-venture capital is a business where occasional big successes generate hundredfold returns
-if you want to find new opportunities for investing, look for things founders complain about
-founders are your customers, and the things they complain about are unsatisfied demand
-but the more general recipe is: do something founders want
-the way to get startup ideas is not to try to think of startup ideas
-it's to look for problems, preferably problems you have yourself
-microsoft, apple, yahoo, google, and facebook all began this way
-it sounds obvious to say you should only work on problems that exist
-and yet by far the most common mistake startups make is to solve problems no one has
-i made it myself
-in 1995 i started a company to put art galleries online
-but galleries didn't want to be online
-it's not how the art business works
-so why did i spend 6 months working on this stupid idea? because i didn't pay attention to users
-i invented a model of the world that didn't correspond to reality, and worked from that
-i didn't notice my model was wrong until i tried to convince users to pay for what we'd built
-even then i took embarrassingly long to catch on
-i was attached to my model of the world, and i'd spent a lot of time on the software
-they had to want it
-at yc we call these "made-up" or "sitcom" startup ideas
-imagine one of the characters on a tv show was starting a startup
-the writers would have to invent something for it to do
-but coming up with good startup ideas is hard
-it's not something you can do for the asking
-for example, a social network for pet owners
-it doesn't sound obviously mistaken
-millions of people have pets
-often they care a lot about their pets and spend a lot of money on them
-surely many of these people would like a site where they could talk to other pet owners
-you could serve them targeted offers, and maybe charge for premium features
-" they say "yeah, maybe i could see using something like that
-" even when the startup launches, it will sound plausible to a lot of people
-sum that reaction across the entire population, and you have zero users
-choose the latter
-if you invert the scale on the y axis, you can envision companies as holes
-google is an immense crater: hundreds of millions of people use it, and they need it a lot
-a startup just starting out can't expect to excavate that much volume
-so you have two choices about the shape of hole you start with
-you can either dig a hole that's broad but shallow, or one that's narrow and deep, like a well
-made-up startup ideas are usually of the first type
-lots of people are mildly interested in a social network for pet owners
-nearly all good startup ideas are of the second type
-microsoft was a well when they made altair basic
-thirty years later facebook had the same shape
-you don't need the narrowness of the well per se
-it's depth you need; you get narrowness as a byproduct of optimizing for depth (and speed)
-but you almost always do get it
-facebook was a good idea because it started with a small market there was a fast path out of
-so you spread rapidly through all the colleges
-once you have all the college students, you get everyone else simply by letting them in
-the founders of airbnb didn't realize at first how big a market they were tapping
-initially they had a much narrower idea
-they were going to let hosts rent out space on their floors during conventions
-they didn't foresee the expansion of this idea; it forced itself upon them gradually
-all they knew at first is that they were onto something
-that's probably as much as bill gates or mark zuckerberg knew at first
-occasionally it's obvious from the beginning when there's a path out of the initial niche
-and sometimes i can see a path that's not immediately obvious; that's one of our specialties at yc
-but there are limits to how well this can be done, no matter how much experience you have
-in zen and the art of motorcycle maintenance, robert pirsig says:
-you want to know how to paint a perfect painting? it's easy
-make yourself perfect and then just paint naturally
-i've wondered about that passage since i read it in high school
-i'm not sure how useful his advice is for painting specifically, but it fits this situation well
-empirically, the way to have good startup ideas is to become the sort of person who has them
-you can also be at the leading edge as a user
-but mark already lived online; to him it seemed natural
-paul buchheit says that people at the leading edge of a rapidly changing field "live in the future
-" combine that with pirsig and you get:
-live in the future, then build what's missing
-that describes the way many if not most of the biggest startups got started
-neither apple nor yahoo nor google nor facebook were even supposed to be companies at first
-they grew out of things their founders built because there seemed a gap in the world
-" lots of people heard about the altair
-lots forgot usb sticks
-the verb you want to be using with respect to startup ideas is not "think up" but "notice
-the most successful startups almost all begin this way
-that may not have been what you wanted to hear
-but disappointing though it may be, this is the truth
-and it is a recipe of a sort, just one that in the worst case takes a year rather than a weekend
-if you're not at the leading edge of some rapidly changing field, you can get to one
-for example, anyone reasonably smart can probably get to an edge of programming (e
-building mobile apps) in a year
-especially if you're also looking for a cofounder
-you don't have to learn programming to be at the leading edge of a domain that's changing fast
-other domains change fast
-but while learning to hack is not necessary, it is for the forseeable future sufficient
-as marc andreessen put it, software is eating the world, and this trend has decades left to run
-knowing how to hack also means that when you have ideas, you'll be able to implement them
-that's not absolutely necessary (jeff bezos couldn't) but it's an advantage
-i'll try building an initial version tonight
-what won't be obvious is that they're startup ideas
-most things that are missing will take some time to see
-you almost have to trick yourself into seeing the ideas around you
-but you know the ideas are out there
-this is not one of those problems where there might not be an answer
-it's impossibly unlikely that this is the exact moment when technological progress stops
-and when these problems get solved, they will probably seem flamingly obvious in retrospect
-what you need to do is turn off the filters that usually prevent you from seeing them
-the most powerful is simply taking the current state of the world for granted
-even the most radically open-minded of us mostly do that
-you couldn't get from your bed to the front door if you stopped to question everything
-pay particular attention to things that chafe you
-when something annoys you, it could be because you're living in the future
-it was obvious to us as programmers that these sites would have to be generated by software
-to sit down and try to think of ideas
-give yourself some time
-drew houston did work on a less promising idea before dropbox: an sat prep startup
-but dropbox was a much better idea, both in the absolute sense and also as a match for his skills
-if you do that, you'll naturally tend to build things that are missing
-it wouldn't seem as interesting to build something that already existed
-it's cool; users love it; it just doesn't matter
-microcomputers seemed like toys when apple and microsoft started working on them
-" backrub seemed like an inconsequential science project
-the facebook was just a way for undergrads to stalk one another
-to us that's positive evidence an idea is good
-live in the future and build what seems interesting
-that's what i'd advise college students to do, rather than trying to learn about "entrepreneurship
-" "entrepreneurship" is something you learn best by doing it
-the examples of the most successful founders make that clear
-what you should be spending your time on in college is ratcheting yourself into the future
-college is an incomparable opportunity to do that
-all you'll learn is the words for things
-the clash of domains is a particularly fruitful source of ideas
-or better still, go work for a biotech company
-cs majors normally get summer jobs at computer hardware or software companies
-or don't take any extra classes, and just build things
-it's no coincidence that microsoft and facebook both got started in january
-but don't feel like you have to build things that will become startups
-that's premature optimization
-just build things
-preferably with other students
-you're also surrounded by other people trying to do the same thing
-beware of research
-whereas a phd dissertation is extremely unlikely to
-competition
-because a good idea should seem obvious, when you have one you'll tend to feel that you're late
-don't let that deter you
-worrying that you're late is one of the signs of a good idea
-ten minutes of searching the web will usually settle the question
-even if you find someone else working on the same thing, you're probably not too late
-if you're uncertain, ask users
-the question then is whether that beachhead is big enough
-err on the side of doing things where you'll face competitors
-inexperienced founders usually give competitors more credit than they deserve
-whether you succeed depends far more on you than on your competitors
-so better a good idea with competitors than a bad one without
-in fact that's a very promising starting point
-google was that type of idea
-your thesis has to be more precise than "we're going to make an x that doesn't suck" though
-you have to be able to phrase it in terms of something the incumbents are overlooking
-google was that type of idea too
-they'd prefer not to deal with tedious problems or get involved in messy ways with the real world
-which is a reasonable preference, because such things slow you down
-and dealing with payments is a schlep for stripe, but not an intolerable one
-we overcame this one to work on viaweb
-we could see the problem was one that needed to be solved though
-and even to the degree it isn't, it's a worse form of self-indulgence
-starting a successful startup is going to be fairly laborious no matter what
-the unsexy filter, while still a source of error, is not as entirely useless as the schlep filter
-particularly as you get older and more experienced
-plus if you find an idea sexy, you'll work on it more enthusiastically
-sometimes you need an idea now
-for example, if you're working on a startup and your initial idea turns out to be bad
-for the rest of this essay i'll talk about tricks for coming up with startup ideas on demand
-although empirically you're better off using the organic strategy, you could succeed this way
-you just have to be more disciplined
-you'll see a lot more ideas, most of them bad, so you need to be able to filter them
-one of the biggest dangers of not using the organic method is the example of the organic method
-organic ideas feel like inspirations
-when searching for ideas, look in areas where you have some expertise
-if you're a database expert, don't build a chat app for teenagers (unless you're also a teenager)
-maybe it's a good idea, but you can't trust your judgment about that, so ignore it
-there have to be other ideas that involve databases, and whose quality you can judge
-the place to start looking for ideas is things you need
-there must be things you need
-" if you can think of any x people said that about, you probably have an idea
-you know there's demand, and people don't say that about things that are impossible to build
-you're probably not the only one
-it's especially good if you're different in a way people will increasingly be
-if you're changing ideas, one unusual thing about you is the idea you'd previously been working on
-did you discover any needs while working on it? several well-known startups began this way
-a particularly promising way to be unusual is to be young
-some of the most valuable new ideas take root first among people in their teens and early twenties
-it would have been very hard for someone who wasn't a college student to start facebook
-the next best thing to an unmet need of your own is an unmet need of someone else
-try talking to everyone you can about the gaps they find in the world
-you're just looking for something to spark a thought
-when you find an unmet need that isn't your own, it may be somewhat blurry at first
-the person who needs something may not know exactly what they need
-one way to ensure you do a good job solving other people's problems is to make them your own
-that may seem like taking things to extremes, but startups are extreme
-we love it when founders do such things
-don't try to start twitter
-those ideas are so rare that you can't find them by looking for them
-make something unsexy that people will pay you for
-what would you pay for right now?
-for example, journalism is in free fall at the moment
-but there may still be money to be made from something like journalism
-but imagine asking that in the future, not now
-when one company or industry replaces another, it usually comes in from the side
-and be imaginative about the axis along which the replacement occurs
-it could be replaced on any of these axes (it has already started to be on most)
-the prices of gene sequencing and 3d printing are both experiencing moore's law-like declines
-looking for waves is essentially a way to simulate the organic method
-finding startup ideas is a subtle business, and that's why most people who try fail so miserably
-it doesn't work well simply to try to think of startup ideas
-if you do that, you get bad ones that sound dangerously plausible
-but even then, not immediately
-it takes time to come across situations where you notice something missing
-live in the future and build what seems interesting
-strange as it sounds, that's the real recipe
-one advantage of y combinator's early, broad focus is that we see trends before most other people
-and one of the most conspicuous trends in the last batch was the large number of hardware startups
-out of 84 companies, 7 were making hardware
-on the whole they've done better than the companies that weren't
-they've faced resistance from investors of course
-investors have a deep-seated bias against hardware
-but investors' opinions are a trailing indicator
-there is no one single force driving this trend
-hardware does well on crowdfunding sites
-electric motors have improved
-wireless connectivity of various types can now be taken for granted
-it's getting more straightforward to get things manufactured
-retailers are less of a bottleneck as customers increasingly buy online
-one question i can answer is why hardware is suddenly cool
-it always was cool
-physical things are great
-they just haven't been as great a way to start a rapidly growing business as software
-but that rule may not be permanent
-it's not even that old; it only dates from about 1990
-maybe the advantage of software will turn out to have been temporary
-hackers love to build hardware, and customers love to buy it
-it wouldn't be the first time something was a bad idea till it wasn't
-and it wouldn't be the first time investors learned that lesson from founders
-a startup is a company designed to grow fast
-being newly founded does not in itself make a company a startup
-" the only essential thing is growth
-everything else we associate with startups follows from growth
-if you want to start one it's important to understand that
-startups are so hard that you can't be pointed off to the side and hope to succeed
-you have to know that growth is what you're after
-the good news is, if you get growth, everything else tends to fall into place
-which means you can use growth like a compass to make almost every decision you face
-millions of companies are started every year in the us
-only a tiny fraction are startups
-most are service businessesrestaurants, barbershops, plumbers, and so on
-these are not startups, except in a few unusual cases
-a barbershop isn't designed to grow fast
-whereas a search engine, for example, is
-when i say startups are designed to grow fast, i mean it in two senses
-partly i mean designed in the sense of intended, because most startups fail
-that difference is why there's a distinct word, "startup," for companies designed to grow fast
-we could just talk about super-successful companies and less successful ones
-but in fact startups do have a different sort of dna from other businesses
-google is not just a barbershop whose founders were unusually lucky and hard-working
-google was different from the beginning
-to grow rapidly, you need to make something you can sell to a big market
-that's the difference between google and a barbershop
-a barbershop doesn't scale
-barbershops are doing fine in the (a) department
-almost everyone needs their hair cut
-the problem for a barbershop, as for any retail establishment, is (b)
-a barbershop serves customers in person, and few will travel far for a haircut
-and even if they did the barbershop couldn't accomodate them
-writing software is a great way to solve (b), but you can still end up constrained in (a)
-if you make software to teach english to chinese speakers, however, you're in startup territory
-most businesses are tightly constrained in (a) or (b)
-the distinctive feature of successful startups is that they're not
-it might seem that it would always be better to start a startup than an ordinary business
-if you write software to teach tibetan to hungarians, you won't have much competition
-the constraints that limit ordinary companies also protect them
-that's the tradeoff
-if you start a barbershop, you only have to compete with other local barbers
-if you start a search engine you have to compete with the whole world
-bar  neighborhood is a sufficient idea for a small business
-similarly for companies constrained in (a)
-your niche both protects and defines you
-but that's not how most startups get started
-[3] but at the moment when successful startups get started, much of the innovation is unconscious
-what's different about successful founders is that they can see different problems
-steve wozniak's problem was that he wanted his own computer
-that was an unusual problem to have in 1975
-but technological change was about to make it a much more common one
-google has similar origins
-larry page and sergey brin wanted to search the web
-that's one connection between startup ideas and technology
-rapid change in one area uncovers big, soluble problems in other areas
-sometimes the changes are advances, and what they change is solubility
-but in google's case the most important change was the growth of the web
-what changed there was not solubility but bigness
-how fast does a company have to grow to be considered a startup? there's no precise answer to that
-"startup" is a pole, not a threshold
-starting one is at first no more than a declaration of one's ambitions
-but at first you have no more than commitment
-starting a startup is like being an actor in that respect
-"actor" too is a pole rather than a threshold
-at the beginning of his career, an actor is a waiter who goes to auditions
-the growth of a successful startup usually has three phases:
-eventually a successful startup will grow into a big company
-together these three phases produce an s-curve
-the phase whose growth defines the startup is the second one, the ascent
-its length and slope determine how big the company will be
-the slope is the company's growth rate
-if there's one number every founder should always know, it's the company's growth rate
-that's the measure of a startup
-if you don't know that number, you don't even know if you're doing well or badly
-" that's not a rate
-a good growth rate during yc is 5-7% a week
-if you can hit 10% a week you're doing exceptionally well
-if you can only manage 1%, it's a sign you haven't yet figured out what you're doing
-the best thing to measure the growth rate of is revenue
-the next best, for startups that aren't charging initially, is active users
-the key word here is "just
-" if they decide to grow at 7% a week and they hit that number, they're successful for that week
-there's nothing more they need to do
-programmers will recognize what we're doing here
-we're turning starting a startup into an optimization problem
-you don't have to think about what the program should do, just make it faster
-for most programmers this is very satisfying work
-judging yourself by weekly growth doesn't mean you can look no more than a week ahead
-it's not that you don't think about the future, just that you think about it no more than necessary
-in theory this sort of hill-climbing could get a startup into trouble
-they could end up on a local maximum
-but in practice that never happens
-nine times out of ten, sitting around strategizing is just a form of procrastination
-whereas founders' intuitions about which hill to climb are usually better than they realize
-plus the maxima in the space of startup ideas are not spiky and isolated
-most fairly good ideas are adjacent to even better ones
-the fascinating thing about optimizing for growth is that it can actually discover startup ideas
-you can use the need for growth as a form of evolutionary pressure
-there's a parallel here to small businesses
-for startups, growth is a constraint much like truth
-every successful startup is at least partly a product of the imagination of growth
-if we project forward we see why
-weeklyyearly
-a company that grows at 1% a week will grow 1
-7x a year, whereas a company that grows at 5% a week will grow 12
-a startup that grows at 5% a week will in 4 years be making $25 million a month
-what happens to fast growing startups tends to surprise even the founders
-small variations in growth rate produce qualitatively different outcomes
-and, strangely enough, it's also why they fail so frequently
-for the right peoplee
-the young bill gatesthe probability might be 20% or even 50%
-so it's not surprising that so many want to take a shot at it
-and since the latter is huge the former should be too
-this doesn't bother me
-it's the same with other high-beta vocations, like being an actor or a novelist
-i've long since gotten used to it
-but it seems to bother a lot of people, particularly those who've started ordinary businesses
-if they stepped back and looked at the whole picture they might be less indignant
-if you judge by the median startup, the whole concept of a startup seems like a fraud
-but it's a mistake to use the median in a domain with so much variation
-the test of any investment is the ratio of return to risk
-but that's not the only reason investors like startups
-the other way to get returns from an investment is in the form of dividends
-the founders can't enrich themselves without also enriching the investors
-why do founders want to take the vcs' money? growth, again
-the constraint between good ideas and growth operates in both directions
-it's not merely that you need a scalable idea to grow
-if you have such an idea and don't grow fast enough, competitors will
-almost every company needs some amount of funding to get started
-but startups often raise money even when they are or could be profitable
-fundamentally that's how the most successful startups view fundraising
-raising money lets you choose your growth rate
-a profitable startup could if it wanted just grow on its own revenues
-growing slower might be slightly dangerous, but chances are it wouldn't kill them
-pretty much every successful startup will get acquisition offers too
-why? what is it about startups that makes other companies want to buy them? [13]
-but acquirers have an additional reason to want startups
-a rapidly growing company is not merely valuable, but dangerous
-if it keeps expanding, it might expand into the acquirer's own territory
-most product acquisitions have some component of fear
-the combination of founders, investors, and acquirers forms a natural ecosystem
-just as our ancestors did to explain the apparently too neat workings of the natural world
-but there is no secret cabal making it all work
-to anyone who knows mark zuckerberg that is the reductio ad absurdum of the initial assumption
-if you want to understand startups, understand growth
-growth drives everything in this world
-and growth explains why successful startups almost invariably get acquisition offers
-to acquirers a fast-growing company is not merely valuable but dangerous too
-understanding growth is what starting a startup consists of
-you're committing to search for one of the rare ideas that generates rapid growth
-because these ideas are so valuable, finding one is hard
-the startup is the embodiment of your discoveries so far
-a startup founder is in effect an economic research scientist
-most don't discover anything that remarkable, but some discover relativity
-the first rule i knew intellectually, but didn't really grasp till it happened to us
-the total value of the companies we've funded is around 10 billion, give or take a few
-but just two companies, dropbox and airbnb, account for about three quarters of it
-in startups, the big winners are big to a degree that violates our expectations about variation
-that yields all sorts of strange consequences
-and yet it's true
-[2] you need to do what you know intellectually to be right, even though it feels wrong
-it's a constant battle for us
-it's hard to make ourselves take enough risks
-when you interview a startup and think "they seem likely to succeed," it's hard not to fund them
-their chances of succeeding seem small
-unfortunately picking winners is harder than that
-that's made harder by the fact that the best startup ideas seem at first like bad ideas
-so the most successful founders tend to work on ideas that few beside them realize are good
-" the intersection is the sweet spot for startups
-this concept is a simple one and yet seeing it as a venn diagram is illuminating
-it reminds you that there is an intersectionthat there are good ideas that seem bad
-it also reminds you that the vast majority of ideas that seem bad are bad
-the fact that the best ideas seem like bad ideas makes it even harder to recognize the big winners
-one could have described microsoft and apple in exactly the same terms
-harder still
-wait, it gets worse
-when you pick a big winner, you won't know it for two years
-meanwhile, the one thing you can measure is dangerously misleading
-but we know that's the wrong metric
-except an inverse one
-that's the scary thing: fundraising is not merely a useless metric, but positively misleading
-the big winners could generate 10,000x returns
-it takes a conscious effort not to do that too
-but those are the wrong eyes to look through
-we can afford to take at least 10x as much risk as demo day investors
-and since risk is usually proportionate to reward, if you can afford to take more risk you should
-i don't know what fraction of them currently raise more after demo day
-[5] but the percentage is certainly way over 30%
-and frankly the thought of a 30% success rate at fundraising makes my stomach clench
-a demo day where only 30% of the startups were fundable would be a shambles
-everyone would agree that yc had jumped the shark
-we ourselves would feel that yc had jumped the shark
-and yet we'd all be wrong
-for better or worse that's never going to be more than a thought experiment
-we could never stand it
-i can make up all sorts of plausible justifications
-it might dilute the value of the alumni network
-i'm not a very good speaker
-i say "um" a lot
-sometimes i have to pause when i lose my train of thought
-i wish i were a better speaker
-but i don't wish i were a better speaker like i wish i were a better writer
-having good ideas is most of writing well
-i first noticed this at a conference several years ago
-there was another speaker who was much better than me
-he had all of us roaring with laughter
-i seemed awkward and halting by comparison
-afterward i put my talk online like i usually do
-boy was he good
-so i decided i'd pay close attention to what he said, to learn how he did it
-after about ten sentences i found myself thinking "i don't want to be a good speaker
-for example, when i give a talk i usually write it out beforehand
-but here again there's a tradeoff between smoothness and ideas
-all the time you spend practicing a talk, you could instead spend making it better
-but i always end up spending most of the time rewriting it instead
-every talk i give ends up being given from a manuscript full of things crossed out and rewritten
-depending on your audience, there are even worse tradeoffs than these
-that's true in writing too of course, but the descent is steeper with talks
-any given person is dumber as a member of an audience than as a reader
-every audience is an incipient mob, and a good speaker uses that
-so are talks useless? they're certainly inferior to the written word as a source of ideas
-but that's not all talks are good for
-when i go to a talk, it's usually because i'm interested in the speaker
-talks are also good at motivating me to do things
-it's probably no coincidence that so many famous speakers are described as motivational speakers
-that may be what public speaking is really for
-it's probably what it was originally for
-the emotional reactions you can elicit with a talk can be a powerful force
-i wish i could say that force was more often used for good than ill, but i'm not sure
-one of the cases he decided was brought by the owner of a food shop
-the owner wanted the student to pay for the smells he was enjoying
-the student was stealing his smells
-it sounds ridiculous to us to treat smells as property
-but i can imagine scenarios in which one could charge for smells
-imagine we were living on a moon base where we had to buy air by the liter
-i could imagine air suppliers adding scents at an extra charge
-the reason it seems ridiculous to us to treat smells as property is that it wouldn't work to
-it would work on a moon base, though
-what counts as property depends on what works to treat as property
-and that not only can change, but has changed
-but hunter gatherers didn't treat land, for example, as property in the way we do
-[2] but we are in the midst of such a change now
-but with the arrival of networks, it's as if we've moved to a planet with a breathable atmosphere
-data moves like smells now
-but building new things takes too long
-people should be able to charge for content when it works to charge for content
-but by "works" i mean something more subtle than "when they can get away with it
-" i mean when people can charge for content without warping society in order to do it
-the crazy legal measures that the labels and studios have been taking have a lot of that flavor
-newspapers and magazines are just as screwed, but they are at least declining gracefully
-the riaa and mpaa would make us breathe through tubes if they could
-ultimately it comes down to common sense
-this is where it's helpful to have working democracies and multiple sovereign countries
-private property is an extremely useful ideaarguably one of our greatest inventions
-so far, each new definition of it has brought us increasing material wealth
-[4] it seems reasonable to suppose the newest one will too
-in this essay i'm going to demonstrate this phenomenon by describing some
-any one of them could make you a billionaire
-don't worry, it's not a sign of weakness
-arguably it's a sign of sanity
-the biggest startup ideas are terrifying
-and not just because they'd be a lot of work
-she says to him:
-here's the thing: if you ever got me, you wouldn't have a clue what to do with me
-that's what these ideas say to us
-this phenomenon is one of the most important things you can understand about startups
-[1] you'd expect big startup ideas to be attractive, but actually they tend to repel you
-and that has a bunch of consequences
-even the most ambitious people are probably best off approaching them obliquely
-a new search engine
-the best ideas are just on the right side of impossible
-i don't know if this one is possible, but there are signs it might be
-that was not a natural move for microsoft
-they did it because they were afraid of google, and google was in the search business
-microsoft : google :: google : facebook
-google used to give me a page of the right answers, fast, with no clutter
-and the pages don't have the clean, sparse feel they used to
-google search results used to look like the output of a unix utility
-now if i accidentally put the cursor in the wrong place, anything might happen
-the way to win here is to build the search engine all the hackers use
-and for the first time in over a decade the idea of switching seems thinkable to me
-feel free to make it excessively hackerish
-make it really good for code search, for example
-replace email
-email was not designed to be used the way we use it now
-email is not a messaging protocol
-it's a todo list
-or rather, my inbox is a todo list, and email is the way things get onto it
-but it is a disastrously bad todo list
-as a todo list protocol, the new protocol should give more power to the recipient than email does
-i want there to be more restrictions on what someone can put on my todo list
-) when does it have to be done?
-this is one of those ideas that's like an irresistible force meeting an immovable object
-on one hand, entrenched protocols are impossible to replace
-and if email is going to get replaced eventually, why not now?
-they're all at the mercy of email too
-whatever you build, make it fast
-gmail has become painfully slow
-gmail is slow because google can't afford to spend a lot on it
-but people will pay for this
-i'd have no problem paying $50 a month
-at least $1000 a month
-replace universities
-people are all over this idea lately, and i think they're onto something
-one could do a lot better for a lot less money
-i don't think universities will disappear
-they won't be replaced wholesale
-they'll just lose the de facto monopoly on certain types of learning that they once had
-y combinator itself is arguably one of them
-if learning breaks up into many little pieces, credentialling may separate from it
-universities seem the place to start
-internet drama
-hollywood has been slow to embrace the internet
-a lot of the reason is the horribleness of cable clients, also known as tvs
-our family didn't wait for apple tv
-we hated our last tv so much that a few months ago we replaced it with an imac bolted to the wall
-more can be stolen by things that are a little more closely related, like games
-there are two ways delivery and payment could play out
-if that's the way things play out, there will also be a need for such infrastructure companies
-the next steve jobs
-his answer was simply "no
-" i already feared that would be the answer
-i asked more to see how he'd qualify it
-but he didn't qualify it at all
-no, there will be no more great new stuff beyond whatever's currently in the pipeline
-so if apple's not going to make the next ipad, who is? none of the existing players
-so the company that creates the next wave of hardware is probably going to have to be a startup
-i realize it sounds preposterously ambitious for a startup to try to become as big as apple
-but no more ambitious than it was for apple to become as big as apple, and they did it
-steve jobs has shown us what's possible
-now steve is gone there's a vacuum we can all feel
-if a new company led boldly into the future of hardware, users would follow
-the ceo of that company, the "next steve jobs," might not measure up to steve jobs
-but he wouldn't have to
-he'd just have to do a better job than samsung and hp and nokia, and that seems pretty doable
-bring back moore's law
-the last 10 years have reminded us what moore's law actually says
-actually what it says is that circuit densities will double every 18 months
-it used to seem pedantic to point that out
-not any more
-intel can no longer give us faster cpus, just more of them
-this moore's law is not as good as the old one
-there are several ways to approach this problem
-and if it's not impossible but simply very hard, it might be worth trying to write it
-the expected value would be high even if the chance of succeeding was low
-the reason the expected value is so high is web services
-and that would in turn mean that you got practically all the users
-they'd take most of intel's business
-then the programmer still does much of the work of optimization
-these people might be your employees, or you might create a marketplace for optimization
-i realize how crazy all this sounds
-in fact, what i like about this idea is all the different ways in which it's wrong
-trying to write the sufficiently smart compiler is by definition a mistake
-now that's what i call a startup idea
-ongoing diagnosis
-for example, in 2004 bill clinton found he was feeling short of breath
-it seems reasonable to assume bill clinton has the best medical care available
-ditto for cancer
-cancer will show up on some sort of radar screen immediately
-(of course, what shows up on the radar screen may be different from what we think of now as cancer
-for example, a friend of mine once had her brain scanned as part of a study
-she was horrified when the doctors running the study discovered what appeared to be a large tumor
-after further testing, it turned out to be a harmless cyst
-but it cost her a few days of terror
-but i think that's just an artifact of current limitations
-there is room for a lot of startups here
-let me conclude with some tactical advice
-don't say, for example, that you're going to replace email
-if you do that you raise too many expectations
-just say you're building todo-list software
-that sounds harmless
-people can notice you've replaced email when it's a fait accompli
-empirically, the way to do really big things seems to be to start with deceptively small things
-empirically, it's not just for other people that you need to start small
-you need to for your own sake
-neither bill gates nor mark zuckerberg knew at first how big their companies were going to get
-all they knew was that they were onto something
-you'll be better off if you operate like columbus and just head in a general westerly direction
-start with something you know works, and when you expand, expand westward
-it felt as if there was some kind of wall between us
-i could never quite tell if they understood what i was saying
-you won't have to babysit the round to make sure it happens
-was there some kind of inverse relation between resourcefulness and being hard to talk to?
-you don't have to explain in detail; they'll chase down all the implications
-that's the connection
-it's conversational resourcefulness
-they traversed idea space as gingerly as a very old person traverses the physical world
-the unsuccessful founders weren't stupid
-they just weren't eager to
-so being hard to talk to was not what was killing the unsuccessful startups
-it was a sign of an underlying lack of resourcefulness
-that's what was killing them
-but the most immediate evidence i had that something was amiss was that i couldn't talk to them
-there are great startup ideas lying around unexploited right under our noses
-one reason we don't see them is a phenomenon i call schlep blindness
-schlep was originally a yiddish word but has passed into general use in the us
-it means a tedious, unpleasant task
-no one likes schleps, but hackers especially dislike them
-maybe that's possible, but i haven't seen it
-one of the many things we do at y combinator is teach hackers about the inevitability of schleps
-no, you can't start a startup by just writing code
-i remember going through this realization myself
-a company is defined by the schleps it will undertake
-and schleps should be dealt with the same way you'd deal with a cold swimming pool: just jump in
-the most dangerous thing about our dislike of schleps is that much of it is unconscious
-your unconscious won't even let you see ideas that involve painful schleps
-that's schlep blindness
-the phenomenon isn't limited to startups
-their unconscious mind decides for them, shrinking from the work involved
-the most striking example i know of schlep blindness is stripe, or rather stripe's idea
-thousands of people must have known about this problem
-you'd have to make deals with banks
-plus there are probably all sorts of regulations to comply with
-it's a lot more intimidating to start a startup like this than a recipe site
-that scariness makes ambitious ideas doubly valuable
-(this is also true of starting a startup generally
-maybe that's one reason the most successful startups of all so often have young founders
-in practice the founders grow with the problems
-but no one seems able to foresee that, not even older, more experienced founders
-they don't know how much they can grow, but they also don't know how much they'll need to
-older founders only make the first mistake
-ignorance can't solve everything though
-some ideas so obviously entail alarming schleps that anyone can see them
-how do you see ideas like that? the trick i recommend is to take yourself out of the picture
-somehow it's as if most places were sprayed with startupicide
-i wondered about this for years
-a couple weeks ago i finally figured it out
-i was framing the question wrong
-the problem is not that most towns kill startups
-it's that death is the default for startups, and most towns don't save them
-startups in other places are just doing what startups naturally do: fail
-the real question is, what's saving startups in places like silicon valley? [2]
-environment
-and what drives them both is the number of startup people around you
-it's quite a leap to start a startup
-it's an unusual thing to do
-but in silicon valley it seems normal
-in most places, if you start a startup, people treat you as if you're unemployed
-having people around you care about what you're doing is an extraordinarily powerful force
-even the most willful people are susceptible to it
-he responded so eagerly that for about half a second i found myself considering doing it
-in most other cities, the prospect of starting a startup just doesn't seem real
-in the valley it's not only real but fashionable
-that no doubt causes a lot of people to start startups who shouldn't
-but i think that's ok
-the second component of the antidote is chance meetings with people who can help you
-the reason startups are more likely to make it here is that great things happen to them too
-in the valley, lightning has a sign bit
-and moreover has advanced views, for 2004, on founders retaining control of their companies
-you can't say precisely what the miracle will be, or even for sure that one will happen
-i bet this is true even for startups we fund
-chance meetings play a role like the role relaxation plays in having ideas
-the critical thing in both cases is that they drift just the right amount
-the meeting between larry page and sergey brin was a good example
-for larry page the most important component of the antidote was sergey brin, and vice versa
-the antidote is people
-i'm not sure why this is so
-a large part of yc's function is to accelerate that process
-to make a startup hub, you need a lot of people interested in startups
-there are three reasons
-the first, obviously, is that if you don't have enough density, the chance meetings don't happen
-sean parker was exactly what facebook needed in 2004
-this is one of the reasons we fund such a large number of companies, incidentally
-in most places the atmosphere pulls you back toward the mean
-i flew into the bay area a few days ago
-i notice this every time i fly over the valley: somehow you can sense something is going on
-obviously you can sense prosperity in how well kept a place looks
-but there are different kinds of prosperity
-silicon valley doesn't look like boston, or new york, or la, or dc
-    </textarea>
+    <textarea style="width:100%; height:200px;" id="ti"></textarea>
   </div>
   <div id="prepro_status"></div>
 
@@ -2022,6 +172,432 @@ <h1>Deep Recurrent Nets character generation demo</h1>
 
   </div>
 </div>
+<script type="text/javascript">
+  function randomMath() {
+    var left = Math.floor(Math.random() * 10);
+    var right = Math.floor(Math.random() * 10);
+    return left + '+' + right + '=' + (left + right);
+  }
+  var inputs = [];
+  for (var i = 0;i < 1000; i++) {
+    inputs.push(randomMath());
+  }
+  document.getElementById('ti').innerHTML = inputs.join('\n');
+
+  // prediction params
+  var sample_softmax_temperature = 1.0; // how peaky model predictions should be
+  var max_chars_gen = 100; // max length of generated sentences
+
+  // various global var inits
+  var epoch_size = -1;
+  var input_size = -1;
+  var output_size = -1;
+  var letterToIndex = {};
+  var indexToLetter = {};
+  var vocab = [];
+  var data_sents = [];
+  var solver = new R.Solver(); // should be class because it needs memory for step caches
+  var pplGraph = new Rvis.Graph();
+
+  var model = {};
+
+  var initVocab = function(sents, count_threshold) {
+    // go over all characters and keep track of all unique ones seen
+    var txt = sents.join(''); // concat all
 
+    // count up all characters
+    var d = {};
+    for(var i=0,n=txt.length;i<n;i++) {
+      var txti = txt[i];
+      if(txti in d) { d[txti] += 1; }
+      else { d[txti] = 1; }
+    }
+
+    // filter by count threshold and create pointers
+    letterToIndex = {};
+    indexToLetter = {};
+    vocab = [];
+    // NOTE: start at one because we will have START and END tokens!
+    // that is, START token will be index 0 in model letter vectors
+    // and END token will be index 0 in the next character softmax
+    var q = 1;
+    for(ch in d) {
+      if(d.hasOwnProperty(ch)) {
+        if(d[ch] >= count_threshold) {
+          // add character to vocab
+          letterToIndex[ch] = q;
+          indexToLetter[q] = ch;
+          vocab.push(ch);
+          q++;
+        }
+      }
+    }
+
+    // globals written: indexToLetter, letterToIndex, vocab (list), and:
+    input_size = vocab.length + 1;
+    output_size = vocab.length + 1;
+    epoch_size = sents.length;
+    $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
+  }
+
+  var utilAddToModel = function(modelto, modelfrom) {
+    for(var k in modelfrom) {
+      if(modelfrom.hasOwnProperty(k)) {
+        // copy over the pointer but change the key to use the append
+        modelto[k] = modelfrom[k];
+      }
+    }
+  }
+
+  var initModel = function() {
+    // letter embedding vectors
+    var model = {};
+    model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
+
+    if(generator === 'rnn') {
+      var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
+      utilAddToModel(model, rnn);
+    } else {
+      var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
+      utilAddToModel(model, lstm);
+    }
+
+    return model;
+  }
+
+  var reinit_learning_rate_slider = function() {
+    // init learning rate slider for controlling the decay
+    // note that learning_rate is a global variable
+    $("#lr_slider").slider({
+      min: Math.log10(0.01) - 3.0,
+      max: Math.log10(0.01) + 0.05,
+      step: 0.05,
+      value: Math.log10(learning_rate),
+      slide: function( event, ui ) {
+        learning_rate = Math.pow(10, ui.value);
+        $("#lr_text").text(learning_rate.toFixed(5));
+      }
+    });
+    $("#lr_text").text(learning_rate.toFixed(5));
+  }
+
+  var reinit = function() {
+    // note: reinit writes global vars
+
+    // eval options to set some globals
+    eval($("#newnet").val());
+
+    reinit_learning_rate_slider();
+
+    solver = new R.Solver(); // reinit solver
+    pplGraph = new Rvis.Graph();
+
+    ppl_list = [];
+    tick_iter = 0;
+
+    // process the input, filter out blanks
+    var data_sents_raw = $('#ti').val().split('\n');
+    data_sents = [];
+    for(var i=0;i<data_sents_raw.length;i++) {
+      var sent = data_sents_raw[i].trim();
+      if(sent.length > 0) {
+        data_sents.push(sent);
+      }
+    }
+
+    initVocab(data_sents, 1); // takes count threshold for characters
+    model = initModel();
+  }
+
+  var saveModel = function() {
+    var out = {};
+    out['hidden_sizes'] = hidden_sizes;
+    out['generator'] = generator;
+    out['letter_size'] = letter_size;
+    var model_out = {};
+    for(var k in model) {
+      if(model.hasOwnProperty(k)) {
+        model_out[k] = model[k].toJSON();
+      }
+    }
+    out['model'] = model_out;
+    var solver_out = {};
+    solver_out['decay_rate'] = solver.decay_rate;
+    solver_out['smooth_eps'] = solver.smooth_eps;
+    step_cache_out = {};
+    for(var k in solver.step_cache) {
+      if(solver.step_cache.hasOwnProperty(k)) {
+        step_cache_out[k] = solver.step_cache[k].toJSON();
+      }
+    }
+    solver_out['step_cache'] = step_cache_out;
+    out['solver'] = solver_out;
+    out['letterToIndex'] = letterToIndex;
+    out['indexToLetter'] = indexToLetter;
+    out['vocab'] = vocab;
+    $("#tio").val(JSON.stringify(out));
+  }
+
+  var loadModel = function(j) {
+    hidden_sizes = j.hidden_sizes;
+    generator = j.generator;
+    letter_size = j.letter_size;
+    model = {};
+    for(var k in j.model) {
+      if(j.model.hasOwnProperty(k)) {
+        var matjson = j.model[k];
+        model[k] = new R.Mat(1,1);
+        model[k].fromJSON(matjson);
+      }
+    }
+    solver = new R.Solver(); // have to reinit the solver since model changed
+    solver.decay_rate = j.solver.decay_rate;
+    solver.smooth_eps = j.solver.smooth_eps;
+    solver.step_cache = {};
+    for(var k in j.solver.step_cache){
+      if(j.solver.step_cache.hasOwnProperty(k)){
+        var matjson = j.solver.step_cache[k];
+        solver.step_cache[k] = new R.Mat(1,1);
+        solver.step_cache[k].fromJSON(matjson);
+      }
+    }
+    letterToIndex = j['letterToIndex'];
+    indexToLetter = j['indexToLetter'];
+    vocab = j['vocab'];
+
+    // reinit these
+    ppl_list = [];
+    tick_iter = 0;
+  }
+
+  var forwardIndex = function(G, model, ix, prev) {
+    var x = G.rowPluck(model['Wil'], ix);
+    // forward prop the sequence learner
+    if(generator === 'rnn') {
+      var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
+    } else {
+      var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
+    }
+    return out_struct;
+  }
+
+  var predictSentence = function(model, samplei, temperature) {
+    if(typeof samplei === 'undefined') { samplei = false; }
+    if(typeof temperature === 'undefined') { temperature = 1.0; }
+
+    var G = new R.Graph(false);
+    var s = '';
+    var prev = {};
+    while(true) {
+
+      // RNN tick
+      var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
+      var lh = forwardIndex(G, model, ix, prev);
+      prev = lh;
+
+      // sample predicted letter
+      logprobs = lh.o;
+      if(temperature !== 1.0 && samplei) {
+        // scale log probabilities by temperature and renormalize
+        // if temperature is high, logprobs will go towards zero
+        // and the softmax outputs will be more diffuse. if temperature is
+        // very low, the softmax outputs will be more peaky
+        for(var q=0,nq=logprobs.w.length;q<nq;q++) {
+          logprobs.w[q] /= temperature;
+        }
+      }
+
+      probs = R.softmax(logprobs);
+      if(samplei) {
+        var ix = R.samplei(probs.w);
+      } else {
+        var ix = R.maxi(probs.w);
+      }
+
+      if(ix === 0) break; // END token predicted, break out
+      if(s.length > max_chars_gen) { break; } // something is wrong
+
+      var letter = indexToLetter[ix];
+      s += letter;
+    }
+    return s;
+  }
+
+  var costfun = function(model, sent) {
+    // takes a model and a sentence and
+    // calculates the loss. Also returns the Graph
+    // object which can be used to do backprop
+    var n = sent.length;
+    var G = new R.Graph();
+    var log2ppl = 0.0;
+    var cost = 0.0;
+    var prev = {};
+    for(var i=-1;i<n;i++) {
+      // start and end tokens are zeros
+      var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
+      var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
+
+      lh = forwardIndex(G, model, ix_source, prev);
+      prev = lh;
+
+      // set gradients into logprobabilities
+      logprobs = lh.o; // interpret output as logprobs
+      probs = R.softmax(logprobs); // compute the softmax probabilities
+
+      log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
+      cost += -Math.log(probs.w[ix_target]);
+
+      // write gradients into log probabilities
+      logprobs.dw = probs.w;
+      logprobs.dw[ix_target] -= 1
+    }
+    var ppl = Math.pow(2, log2ppl / (n - 1));
+    return {'G':G, 'ppl':ppl, 'cost':cost};
+  }
+
+  function median(values) {
+    values.sort( function(a,b) {return a - b;} );
+    var half = Math.floor(values.length/2);
+    if(values.length % 2) return values[half];
+    else return (values[half-1] + values[half]) / 2.0;
+  }
+
+  var ppl_list = [];
+  var tick_iter = 0;
+  var tick = function() {
+
+    // sample sentence fromd data
+    var sentix = R.randi(0,data_sents.length);
+    var sent = data_sents[sentix];
+
+    var t0 = +new Date();  // log start timestamp
+
+    // evaluate cost function on a sentence
+    var cost_struct = costfun(model, sent);
+
+    // use built up graph to compute backprop (set .dw fields in mats)
+    cost_struct.G.backward();
+    // perform param update
+    var solver_stats = solver.step(model, learning_rate, regc, clipval);
+    //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
+
+    var t1 = +new Date();
+    var tick_time = t1 - t0;
+
+    ppl_list.push(cost_struct.ppl); // keep track of perplexity
+
+    // evaluate now and then
+    tick_iter += 1;
+    if(tick_iter % 50 === 0) {
+      // draw samples
+      $('#samples').html('');
+      for(var q=0;q<5;q++) {
+        var pred = predictSentence(model, true, sample_softmax_temperature);
+        var pred_div = '<div class="apred">'+pred+'</div>'
+        $('#samples').append(pred_div);
+      }
+    }
+    if(tick_iter % 10 === 0) {
+      // draw argmax prediction
+      $('#argmax').html('');
+      var pred = predictSentence(model, false);
+      var pred_div = '<div class="apred">'+pred+'</div>'
+      $('#argmax').append(pred_div);
+
+      // keep track of perplexity
+      $('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
+      $('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
+      $('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
+
+      if(tick_iter % 100 === 0) {
+        var median_ppl = median(ppl_list);
+        ppl_list = [];
+        pplGraph.add(tick_iter, median_ppl);
+        pplGraph.drawSelf(document.getElementById("pplgraph"));
+      }
+    }
+  }
+
+  var gradCheck = function() {
+    var model = initModel();
+    var sent = '^test sentence$';
+    var cost_struct = costfun(model, sent);
+    cost_struct.G.backward();
+    var eps = 0.000001;
+
+    for(var k in model) {
+      if(model.hasOwnProperty(k)) {
+        var m = model[k]; // mat ref
+        for(var i=0,n=m.w.length;i<n;i++) {
+
+          oldval = m.w[i];
+          m.w[i] = oldval + eps;
+          var c0 = costfun(model, sent);
+          m.w[i] = oldval - eps;
+          var c1 = costfun(model, sent);
+          m.w[i] = oldval;
+
+          var gnum = (c0.cost - c1.cost)/(2 * eps);
+          var ganal = m.dw[i];
+          var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
+          if(relerr > 1e-1) {
+            console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
+          }
+        }
+      }
+    }
+  }
+
+  var iid = null;
+  $(function() {
+
+    // attach button handlers
+    $('#learn').click(function(){
+      reinit();
+      if(iid !== null) { clearInterval(iid); }
+      iid = setInterval(tick, 0);
+    });
+    $('#stop').click(function(){
+      if(iid !== null) { clearInterval(iid); }
+      iid = null;
+    });
+    $("#resume").click(function(){
+      if(iid === null) {
+        iid = setInterval(tick, 0);
+      }
+    });
+
+    $("#savemodel").click(saveModel);
+    $("#loadmodel").click(function(){
+      var j = JSON.parse($("#tio").val());
+      loadModel(j);
+    });
+
+    $("#loadpretrained").click(function(){
+      $.getJSON("lstm_100_model.json", function(data) {
+        pplGraph = new Rvis.Graph();
+        learning_rate = 0.0001;
+        reinit_learning_rate_slider();
+        loadModel(data);
+      });
+    });
+
+    $("#learn").click(); // simulate click on startup
+
+    //$('#gradcheck').click(gradCheck);
+
+    $("#temperature_slider").slider({
+      min: -1,
+      max: 1.05,
+      step: 0.05,
+      value: 0,
+      slide: function( event, ui ) {
+        sample_softmax_temperature = Math.pow(10, ui.value);
+        $("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
+      }
+    });
+  });
+
+</script>
 </body>
 </html>

From 80dee368548653132706e322fee6c95d8b1829ea Mon Sep 17 00:00:00 2001
From: Robert Plummer <robertleeplummerjr@gmail.com>
Date: Sun, 28 Aug 2016 21:55:10 -0400
Subject: [PATCH 2/5] add math demo

---
 character_demo.html | 2278 +++++++++++++++++++++++++++++++++++--------
 math_demo.html      |  603 ++++++++++++
 2 files changed, 2454 insertions(+), 427 deletions(-)
 create mode 100644 math_demo.html

diff --git a/character_demo.html b/character_demo.html
index f3dc50f..80a61a0 100644
--- a/character_demo.html
+++ b/character_demo.html
@@ -77,6 +77,424 @@
 
 <script src="src/recurrent.js"></script>
 <script src="src/vis.js"></script>
+
+<script type="text/javascript">
+
+// prediction params
+var sample_softmax_temperature = 1.0; // how peaky model predictions should be
+var max_chars_gen = 100; // max length of generated sentences
+
+// various global var inits
+var epoch_size = -1;
+var input_size = -1;
+var output_size = -1;
+var letterToIndex = {};
+var indexToLetter = {};
+var vocab = [];
+var data_sents = [];
+var solver = new R.Solver(); // should be class because it needs memory for step caches
+var pplGraph = new Rvis.Graph();
+
+var model = {};
+
+var initVocab = function(sents, count_threshold) {
+  // go over all characters and keep track of all unique ones seen
+  var txt = sents.join(''); // concat all
+
+  // count up all characters
+  var d = {};
+  for(var i=0,n=txt.length;i<n;i++) {
+    var txti = txt[i];
+    if(txti in d) { d[txti] += 1; } 
+    else { d[txti] = 1; }
+  }
+
+  // filter by count threshold and create pointers
+  letterToIndex = {};
+  indexToLetter = {};
+  vocab = [];
+  // NOTE: start at one because we will have START and END tokens!
+  // that is, START token will be index 0 in model letter vectors
+  // and END token will be index 0 in the next character softmax
+  var q = 1; 
+  for(ch in d) {
+    if(d.hasOwnProperty(ch)) {
+      if(d[ch] >= count_threshold) {
+        // add character to vocab
+        letterToIndex[ch] = q;
+        indexToLetter[q] = ch;
+        vocab.push(ch);
+        q++;
+      }
+    }
+  }
+
+  // globals written: indexToLetter, letterToIndex, vocab (list), and:
+  input_size = vocab.length + 1;
+  output_size = vocab.length + 1;
+  epoch_size = sents.length;
+  $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
+}
+
+var utilAddToModel = function(modelto, modelfrom) {
+  for(var k in modelfrom) {
+    if(modelfrom.hasOwnProperty(k)) {
+      // copy over the pointer but change the key to use the append
+      modelto[k] = modelfrom[k];
+    }
+  }
+}
+
+var initModel = function() {
+  // letter embedding vectors
+  var model = {};
+  model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
+  
+  if(generator === 'rnn') {
+    var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
+    utilAddToModel(model, rnn);
+  } else {
+    var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
+    utilAddToModel(model, lstm);
+  }
+
+  return model;
+}
+
+var reinit_learning_rate_slider = function() {
+  // init learning rate slider for controlling the decay
+  // note that learning_rate is a global variable
+  $("#lr_slider").slider({
+    min: Math.log10(0.01) - 3.0,
+    max: Math.log10(0.01) + 0.05,
+    step: 0.05,
+    value: Math.log10(learning_rate),
+    slide: function( event, ui ) {
+      learning_rate = Math.pow(10, ui.value);
+      $("#lr_text").text(learning_rate.toFixed(5));
+    }
+  });
+  $("#lr_text").text(learning_rate.toFixed(5));
+}
+
+var reinit = function() {
+  // note: reinit writes global vars
+  
+  // eval options to set some globals
+  eval($("#newnet").val());
+
+  reinit_learning_rate_slider();
+
+  solver = new R.Solver(); // reinit solver
+  pplGraph = new Rvis.Graph();
+
+  ppl_list = [];
+  tick_iter = 0;
+
+  // process the input, filter out blanks
+  var data_sents_raw = $('#ti').val().split('\n');
+  data_sents = [];
+  for(var i=0;i<data_sents_raw.length;i++) {
+    var sent = data_sents_raw[i].trim();
+    if(sent.length > 0) {
+      data_sents.push(sent);
+    }
+  }
+
+  initVocab(data_sents, 1); // takes count threshold for characters
+  model = initModel();
+}
+
+var saveModel = function() {
+  var out = {};
+  out['hidden_sizes'] = hidden_sizes;
+  out['generator'] = generator;
+  out['letter_size'] = letter_size;
+  var model_out = {};
+  for(var k in model) {
+    if(model.hasOwnProperty(k)) {
+      model_out[k] = model[k].toJSON();
+    }
+  }
+  out['model'] = model_out;
+  var solver_out = {};
+  solver_out['decay_rate'] = solver.decay_rate;
+  solver_out['smooth_eps'] = solver.smooth_eps;
+  step_cache_out = {};
+  for(var k in solver.step_cache) {
+    if(solver.step_cache.hasOwnProperty(k)) {
+      step_cache_out[k] = solver.step_cache[k].toJSON();
+    }
+  }
+  solver_out['step_cache'] = step_cache_out;
+  out['solver'] = solver_out;
+  out['letterToIndex'] = letterToIndex;
+  out['indexToLetter'] = indexToLetter;
+  out['vocab'] = vocab;
+  $("#tio").val(JSON.stringify(out));
+}
+
+var loadModel = function(j) {
+  hidden_sizes = j.hidden_sizes;
+  generator = j.generator;
+  letter_size = j.letter_size;
+  model = {};
+  for(var k in j.model) {
+    if(j.model.hasOwnProperty(k)) {
+      var matjson = j.model[k];
+      model[k] = new R.Mat(1,1);
+      model[k].fromJSON(matjson);
+    }
+  }
+  solver = new R.Solver(); // have to reinit the solver since model changed
+  solver.decay_rate = j.solver.decay_rate;
+  solver.smooth_eps = j.solver.smooth_eps;
+  solver.step_cache = {};
+  for(var k in j.solver.step_cache){
+      if(j.solver.step_cache.hasOwnProperty(k)){
+          var matjson = j.solver.step_cache[k];
+          solver.step_cache[k] = new R.Mat(1,1);
+          solver.step_cache[k].fromJSON(matjson);
+      }
+  }
+  letterToIndex = j['letterToIndex'];
+  indexToLetter = j['indexToLetter'];
+  vocab = j['vocab'];
+
+  // reinit these
+  ppl_list = [];
+  tick_iter = 0;
+}
+
+var forwardIndex = function(G, model, ix, prev) {
+  var x = G.rowPluck(model['Wil'], ix);
+  // forward prop the sequence learner
+  if(generator === 'rnn') {
+    var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
+  } else {
+    var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
+  }
+  return out_struct;
+}
+
+var predictSentence = function(model, samplei, temperature) {
+  if(typeof samplei === 'undefined') { samplei = false; }
+  if(typeof temperature === 'undefined') { temperature = 1.0; }
+
+  var G = new R.Graph(false);
+  var s = '';
+  var prev = {};
+  while(true) {
+
+    // RNN tick
+    var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
+    var lh = forwardIndex(G, model, ix, prev);
+    prev = lh;
+
+    // sample predicted letter
+    logprobs = lh.o;
+    if(temperature !== 1.0 && samplei) {
+      // scale log probabilities by temperature and renormalize
+      // if temperature is high, logprobs will go towards zero
+      // and the softmax outputs will be more diffuse. if temperature is
+      // very low, the softmax outputs will be more peaky
+      for(var q=0,nq=logprobs.w.length;q<nq;q++) {
+        logprobs.w[q] /= temperature;
+      }
+    }
+
+    probs = R.softmax(logprobs);
+    if(samplei) {
+      var ix = R.samplei(probs.w);
+    } else {
+      var ix = R.maxi(probs.w);  
+    }
+    
+    if(ix === 0) break; // END token predicted, break out
+    if(s.length > max_chars_gen) { break; } // something is wrong
+
+    var letter = indexToLetter[ix];
+    s += letter;
+  }
+  return s;
+}
+
+var costfun = function(model, sent) {
+  // takes a model and a sentence and
+  // calculates the loss. Also returns the Graph
+  // object which can be used to do backprop
+  var n = sent.length;
+  var G = new R.Graph();
+  var log2ppl = 0.0;
+  var cost = 0.0;
+  var prev = {};
+  for(var i=-1;i<n;i++) {
+    // start and end tokens are zeros
+    var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
+    var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
+
+    lh = forwardIndex(G, model, ix_source, prev);
+    prev = lh;
+
+    // set gradients into logprobabilities
+    logprobs = lh.o; // interpret output as logprobs
+    probs = R.softmax(logprobs); // compute the softmax probabilities
+
+    log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
+    cost += -Math.log(probs.w[ix_target]);
+
+    // write gradients into log probabilities
+    logprobs.dw = probs.w;
+    logprobs.dw[ix_target] -= 1
+  }
+  var ppl = Math.pow(2, log2ppl / (n - 1));
+  return {'G':G, 'ppl':ppl, 'cost':cost};
+}
+
+function median(values) {
+  values.sort( function(a,b) {return a - b;} );
+  var half = Math.floor(values.length/2);
+  if(values.length % 2) return values[half];
+  else return (values[half-1] + values[half]) / 2.0;
+}
+
+var ppl_list = [];
+var tick_iter = 0;
+var tick = function() {
+
+  // sample sentence fromd data
+  var sentix = R.randi(0,data_sents.length);
+  var sent = data_sents[sentix];
+
+  var t0 = +new Date();  // log start timestamp
+
+  // evaluate cost function on a sentence
+  var cost_struct = costfun(model, sent);
+  
+  // use built up graph to compute backprop (set .dw fields in mats)
+  cost_struct.G.backward();
+  // perform param update
+  var solver_stats = solver.step(model, learning_rate, regc, clipval);
+  //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
+
+  var t1 = +new Date();
+  var tick_time = t1 - t0;
+
+  ppl_list.push(cost_struct.ppl); // keep track of perplexity
+
+  // evaluate now and then
+  tick_iter += 1;
+  if(tick_iter % 50 === 0) {
+    // draw samples
+    $('#samples').html('');
+    for(var q=0;q<5;q++) {
+      var pred = predictSentence(model, true, sample_softmax_temperature);
+      var pred_div = '<div class="apred">'+pred+'</div>'
+      $('#samples').append(pred_div);
+    }
+  }
+  if(tick_iter % 10 === 0) {
+    // draw argmax prediction
+    $('#argmax').html('');
+    var pred = predictSentence(model, false);
+    var pred_div = '<div class="apred">'+pred+'</div>'
+    $('#argmax').append(pred_div);
+
+    // keep track of perplexity
+    $('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
+    $('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
+    $('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
+
+    if(tick_iter % 100 === 0) {
+      var median_ppl = median(ppl_list);
+      ppl_list = [];
+      pplGraph.add(tick_iter, median_ppl);
+      pplGraph.drawSelf(document.getElementById("pplgraph"));
+    }
+  }
+}
+
+var gradCheck = function() {
+  var model = initModel();
+  var sent = '^test sentence$';
+  var cost_struct = costfun(model, sent);
+  cost_struct.G.backward();
+  var eps = 0.000001;
+
+  for(var k in model) {
+    if(model.hasOwnProperty(k)) {
+      var m = model[k]; // mat ref
+      for(var i=0,n=m.w.length;i<n;i++) {
+        
+        oldval = m.w[i];
+        m.w[i] = oldval + eps;
+        var c0 = costfun(model, sent);
+        m.w[i] = oldval - eps;
+        var c1 = costfun(model, sent);
+        m.w[i] = oldval;
+
+        var gnum = (c0.cost - c1.cost)/(2 * eps);
+        var ganal = m.dw[i];
+        var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
+        if(relerr > 1e-1) {
+          console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
+        }
+      }
+    }
+  }
+}
+
+var iid = null;
+$(function() {
+
+  // attach button handlers
+  $('#learn').click(function(){ 
+    reinit();
+    if(iid !== null) { clearInterval(iid); }
+    iid = setInterval(tick, 0); 
+  });
+  $('#stop').click(function(){ 
+    if(iid !== null) { clearInterval(iid); }
+    iid = null;
+  });
+  $("#resume").click(function(){
+    if(iid === null) {
+      iid = setInterval(tick, 0); 
+    }
+  });
+
+  $("#savemodel").click(saveModel);
+  $("#loadmodel").click(function(){
+    var j = JSON.parse($("#tio").val());
+    loadModel(j);
+  });
+
+  $("#loadpretrained").click(function(){
+    $.getJSON("lstm_100_model.json", function(data) {
+      pplGraph = new Rvis.Graph();
+      learning_rate = 0.0001;
+      reinit_learning_rate_slider();
+      loadModel(data);
+    });
+  });
+
+  $("#learn").click(); // simulate click on startup
+
+  //$('#gradcheck').click(gradCheck);
+
+  $("#temperature_slider").slider({
+    min: -1,
+    max: 1.05,
+    step: 0.05,
+    value: 0,
+    slide: function( event, ui ) {
+      sample_softmax_temperature = Math.pow(10, ui.value);
+      $("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
+    }
+  });
+});
+
+</script>
 </head>
 
 <body>
@@ -101,7 +519,1439 @@ <h1>Deep Recurrent Nets character generation demo</h1>
   </div>
   <div>
     <div class="hh">Input sentences:</div>
-    <textarea style="width:100%; height:200px;" id="ti"></textarea>
+    <textarea style="width:100%; height:200px;" id="ti">
+the company has, say, 6 months of runway
+or to put it more brutally, 6 months before they're out of business
+they expect to avoid that by raising more from investors
+that last sentence is the fatal one
+it's hard to convince investors the first time too, but founders expect that
+what bites them the second time is a confluence of three forces:
+the company is spending more now than it did the first time it raised money
+investors have much higher standards for companies that have already raised money
+the company is now starting to read as a failure
+the first time it raised money, it was neither a success nor a failure; it was too early to ask
+i'm going to call the situation i described in the first paragraph "the fatal pinch
+one of the things that makes the fatal pinch so dangerous is that it's self-reinforcing
+y combinator tells founders who raise money to act as if it's the last they'll ever get
+i will now, by an amazing feat of clairvoyance, do this for you: the probability is zero
+you should shut down the company if you're certain it will fail no matter what you do
+companies rarely have to fail though
+what i'm really doing here is giving you the option of admitting you've already given up
+if you don't want to shut down the company, that leaves increasing revenues and decreasing expenses
+in most startups, expenses  people and decreasing expenses  firing people
+if so, now's the time
+which leaves two options, firing good people and making more money
+you should lean more toward firing people if the source of your trouble is overhiring
+plus those 15 people might not even be the ones you need for whatever you end up building
+so the solution may be to shrink and then figure out what direction to grow in
+it may seem facile to suggest a startup make more money, as if that could be done for the asking
+usually a startup is already trying as hard as it can to sell whatever it sells
+but only work on whatever will get you the most revenue the soonest
+or you may have expertise in some new field they don't understand
+and to the extent you can, try to avoid the worst pitfalls of consulting
+you keep the ip and no billing by the hour
+you just have to realize in time that you're near death
+and if you're in the fatal pinch, you are
+it struck me recently how few of the most successful people i know are mean
+there are exceptions, but remarkably few
+meanness isn't rare
+in fact, one of the things the internet has shown us is how mean people can be
+a few decades ago, only famous people and professional writers got to publish their opinions
+now everyone can, and we can all see the long tail of meanness that had previously been hidden
+what's going on here? are meanness and success inversely correlated?
+part of what's going on, of course, is selection bias
+i only know people who work in certain fields: startup founders, programmers, professors
+i'm willing to believe that successful people in other fields are mean
+maybe successful hedge fund managers are mean; i don't know enough to say
+it seems quite likely that most successful drug lords are mean
+being married to her is like standing next to an airport baggage scanner
+why? i think there are several reasons
+one is that being mean makes you stupid
+that's why i hate fights
+you never do your best work in a fight, because fights are not sufficiently general
+winning is always a function of the situation and the people involved
+and yet fighting is just as much work as thinking about real problems
+startups don't win by attacking
+they win by transcending
+there are exceptions of course, but usually the way to win is to race ahead, not to stop and fight
+another reason mean founders lose is that they can't get the best people to work for them
+they can hire people who will put up with them because they need a job
+but the best people have other options
+a mean person can't convince the best people to work for him unless he is super convincing
+and while having the best people helps any organization, it's critical for startups
+the startup founders who end up richest are not the ones driven by money
+[1] the ones who keep going are driven by something else
+they may not say so explicitly, but they're usually trying to improve the world
+which means people with a desire to improve the world have a natural advantage
+this kind of work is the future
+for most of history success meant control of scarce resources
+for most of history, success meant success at zero-sum games
+and in most of them meanness was not a handicap but probably an advantage
+that is changing
+increasingly the games that matter are not zero-sum
+there have long been games where you won by having new ideas
+in the third century bc archimedes won by doing that
+at least until an invading roman army killed him
+and not just not being at war
+people need to feel that what they create can't be stolen
+that has always been the case for thinkers, which is why this trend began with them
+the exciting thing is that their m
+seems to be spreading
+so i'm really glad i stopped to think about this
+jessica and i have always worked hard to teach our kids not to be mean
+we tolerate noise and mess and junk food, but not meanness
+startups are very counterintuitive
+i'm not sure why
+maybe it's just because knowledge about them hasn't permeated our culture yet
+but whatever the reason, starting a startup is a task where you can't always trust your instincts
+it's like skiing in that way
+when you first try skiing and you want to slow down, your instinct is to lean back
+but if you lean back on skis you fly down the hill out of control
+so part of learning to ski is learning to suppress that impulse
+eventually you get new habits, but at first it takes a conscious effort
+at first there's a list of things you're trying to remember as you start down the hill
+startups are as unnatural as skiing, so there's a similar list for startups
+counterintuitive
+if you know nothing more than this, you may at least pause before making them
+it's really true
+they seem wrong
+so of course your first impulse is to disregard them
+if founders' instincts already gave them the right answers, they wouldn't need us
+you only need other people to give you advice that surprises you
+that's why there are a lot of ski instructors and not many running instructors
+you can, however, trust your instincts about people
+and in fact one of the most common mistakes young founders make is not to do that enough
+if someone seems slippery, or bogus, or a jerk, don't ignore it
+this is one case where it pays to be self-indulgent
+work with people you genuinely like, and you've known long enough to be sure
+the second counterintuitive point is that it's not that important to know a lot about startups
+mark zuckerberg didn't succeed because he was an expert on startups
+if you don't know anything about, say, how to raise an angel round, don't feel bad on that account
+that sort of thing you can learn when you need to, and forget after you've done it
+" it would set off alarms
+from the outside that seems like what startups do
+we saw this happen so often that we made up a name for it: playing house
+eventually i realized why it was happening
+think about what you have to do to get into college, for example
+extracurricular activities, check
+even in college classes most of the work is as artificial as running laps
+i'm not attacking the educational system for being this way
+i confess i did it myself in college
+it was like a game
+then they want to know what the tricks are for growing fast
+and we have to tell them the best way to do that is simply to make something people want
+" and the partner replying "just
+gaming the system may continue to work if you go to work for a big company
+[2] but that doesn't work with startups
+startups are as impersonal as physics
+you have to make something people want, and you prosper only to the extent you do
+the dangerous thing is, faking does work to some degree on investors
+but it's not in your interest to
+the company is ultimately doomed
+all you're doing is wasting your own time riding it down
+so stop looking for the trick
+it's exciting that there even exist parts of the world where you win by doing good work
+how do you win in each type of work, and what would you like to win by doing? [4]
+all-consuming
+that brings us to our fourth counterintuitive point: startups are all-consuming
+if you start a startup, it will take over your life to a degree you cannot imagine
+so there is a real opportunity cost here
+larry page may seem to have an enviable life, but there are aspects of it that are unenviable
+if he goes on vacation for even a week, a whole week's backlog of shit accumulates
+it never gets any easier
+the nature of the problems change
+but the total volume of worry never decreases; if anything it increases
+many of which will make you a better parent when you do have kids
+and since you can delay pushing the button for a while, most people in rich countries do
+to be fair, the universities have their hand forced here
+a lot of incoming students are interested in startups
+universities are, at least de facto, expected to prepare them for their careers
+so students who want to start startups hope universities can teach them about startups
+can universities teach students about startups? yes and no
+[5] so starting a startup is intrinsically something you can only really learn by doing it
+you may be nominally a student for a bit, but you won't even be that for long
+do not start a startup in college
+starting a startup is like a brutally fast depth-first search
+most people should still be searching breadth-first at 20
+if you start a startup at 20 and you're sufficiently successful, you'll never get to do it
+mark zuckerberg will never get to bum around a foreign country
+he can do other things most people can't, like charter jets to fly him to foreign countries
+but success has taken a lot of the serendipity out of his life
+facebook is running him as much as he's running facebook
+among other things it gives you more options to choose your life's work from
+there's not even a tradeoff here
+should you do it at any age? i realize i've made startups sound pretty hard
+if i haven't, let me try again: starting a startup is really hard
+what if it's too hard? how can you tell if you're up to this challenge?
+the answer is the fifth counterintuitive point: you can't tell
+starting a startup will change you a lot
+it was easy to tell how smart they were, and most people reading this will be over that threshold
+the hard part was predicting how tough and ambitious they would become
+the founders sometimes think they know
+if you're absolutely terrified of starting a startup, you probably shouldn't do it
+but if you're merely unsure whether you're up to it, the only way to find out is to try
+just not now
+for getting both is the same
+i've written a whole essay on this, so i won't repeat it all here
+the way to come up with good startup ideas is to take a step back
+in fact, so unconsciously that you don't even realize at first that they're startup ideas
+this is not only possible, it's how apple, yahoo, google, and facebook all got started
+none of these companies were even meant to be companies at first
+they were all just side projects
+the third part, incidentally, is how you get cofounders at the same time as the idea
+" but that prescription, though sufficient, is too narrow
+what was special about brian chesky and joe gebbia was not that they were experts in technology
+what kind of problems are those? that is very hard to answer in the general case
+so how do you know when you're working on real stuff? [8]
+i know how i know
+y combinator itself was something i only did because it seemed interesting
+so i seem to have some sort of internal compass that helps me out
+but i don't know what other people have in their heads
+and indeed, probably also the best way to live
+you may not realize they're startup ideas, but you'll know they're something that ought to exist
+he didn't mean it to be a startup, and he never tried to turn it into one
+" it's the classic version of college as education for its own sake
+the component of entrepreneurship that really matters is domain expertise
+the way to become larry page was to become an expert on search
+at its best, starting a startup is merely an ulterior motive for curiosity
+and you'll do it best if you introduce the ulterior motive toward the end of the process
+most startups that raise money do it more than once
+reality can be messier
+some companies raise money twice in phase 2
+others skip phase 1 and go straight to phase 2
+but the three phase path is at least the one about which individual startups' paths oscillate
+this essay focuses on phase 2 fundraising
+that problem is irreducible; it should be hard
+but much of the other kind of difficulty can be eliminated
+you can't trust your intuitions
+i'm going to give you a set of rules here that will get you through this process if anything will
+at certain moments you'll be tempted to ignore them
+so rule number zero is: these rules exist for a reason
+the ultimate source of the forces acting on you are the forces acting on investors
+but that fast growth means investors can't wait around
+if you wait till a startup is obviously a success, it's too late
+but that in turn makes investors nervous they're about to invest in a flop
+as indeed they often are
+what investors would like to do, if they could, is wait
+but if you wait too long, other investors might take the deal away from you
+and of course the other investors are all subject to the same forces
+don't raise money unless you want it and it wants you
+actually it isn't
+rapid growth is what makes a company a startup
+the other time not to raise money is when you won't be able to
+be in fundraising mode or not
+one of the things that surprises founders most about fundraising is how distracting it is
+when you start fundraising, everything else grinds to a halt
+the problem is not the time fundraising consumes but that it becomes the top idea in your mind
+a startup can't endure that level of distraction for long
+because fundraising is so distracting, a startup should either be in fundraising mode or not
+you can take money from investors when you're not in fundraising mode
+you just can't expend any attention on it
+there are two things that take attention: convincing investors, and negotiating with them
+[3] the terms will be whatever they turn out to be in your next equity round
+investors will try to lure you into fundraising when you're not
+it's great for them if they can, because they can thereby get a shot at you before everyone else
+they'll send you emails saying they want to meet to learn more about you
+deals don't happen that way
+they may say they just want to meet and chat, but investors never just want to meet and chat
+get introductions to investors
+before you can talk to investors, you have to be introduced to them
+if you're presenting at a demo day, you'll be introduced to a whole bunch simultaneously
+but even if you are, you should supplement these with intros you collect yourself
+do you have to be introduced? in phase 2, yes
+intros vary greatly in effectiveness
+the best type of intro is from a well-known investor who has just invested in you
+so when you get an investor to commit, ask them to introduce you to other investors they respect
+[7] the next best type of intro is from a founder of a company they've funded
+you can also get intros from other people in the startup community, like lawyers and reporters
+there are now sites like angellist, fundersclub, and wefunder that can introduce you to investors
+we recommend startups treat them as auxiliary sources of money
+raise money first from leads you get yourself
+those will on average be better investors
+hear no till you hear yes
+i mentioned earlier that investors prefer to wait if they can
+what's particularly dangerous for founders is the way they wait
+essentially, they lead you on
+they seem like they're about to invest right up till the moment they say no
+if they even say no
+some of the worse ones never actually do say no; they just stop replying to your emails
+they hope that way to get a free option on investing
+that's not the worst thing investors will do
+and wishful thinking founders are happy to meet them half way
+fortunately, the next rule is a tactic for neutralizing this behavior
+but to work it depends on you not being tricked by the no that sounds like yes
+if you believe an investor has committed, get them to confirm it
+and till they confirm, regard them as saying no
+do breadth-first search weighted by expected value
+when you talk to investors your m
+should be breadth-first search, weighted by expected value
+you should always talk to investors in parallel rather than serially
+meet such investors last, if at all
+but you have to be disciplined about assigning probabilities
+you can't let how much you want an investor influence your estimate of how much they want you
+know where you stand
+never leave a meeting with an investor without asking what happens next
+if you're experienced at negotiations, you already know how to ask such questions
+[13] if you're not, there's a trick you can use in this situation
+investors know you're inexperienced at raising money
+inexperience there doesn't make you unattractive
+larry and sergey were noobs at fundraising
+get the first commitment
+the biggest factor in most investors' opinions of you is the opinion of other investors
+once you start getting investors to commit, it becomes increasingly easy to get more to
+but the other side of this coin is that it's often hard to get the first commitment
+getting the first substantial offer can be half the total difficulty of fundraising
+what counts as a substantial offer depends on who it's from and how much it is
+money from friends and family doesn't usually count, no matter how much
+close committed money
+it's not a deal till the money's in the bank
+and it's also one that furnishes them plenty of excuses to gratify it
+the public markets snap startup investing around like a whip
+if the chinese economy blows up tomorrow, all bets are off
+tomorrow a big competitor could appear, or you could get cded, or your cofounder could quit
+even a day's delay can bring news that causes an investor to change their mind
+so when someone commits, get the money
+knowing where you stand doesn't end when they say they'll invest
+inexperienced investors are the ones most likely to get buyer's remorse
+but i've heard of cases of even top-tier vc firms welching on deals
+avoid investors who don't "lead
+some investors are known for deciding quickly, and those are extra valuable early on
+conversely, an investor who will only invest once other investors have is worthless initially
+you can recognize this contemptible subspecies of investor because they often talk about "leads
+" they say that they don't lead, or that they'll invest once you have a lead
+now there are rarely actual rounds before the a round, or leads for them
+now startups simply raise money from investors one at a time till they feel they have enough
+the spectral signature of all mediocre investors
+have multiple plans
+many investors will ask how much you're planning to raise
+this question makes founders feel they should be planning to raise a specific amount
+but in fact you shouldn't
+it's a mistake to have fixed plans in an undertaking as unpredictable as fundraising
+" i've known a handful of founders who could pull that off without having vcs laugh in their faces
+different plans match different investors
+$15k per month is high, so don't actually spend that much
+but it's ok to use a high estimate when fundraising to add a margin for error
+if you have additional expenses, like manufacturing, add in those at the end
+underestimate how much you want
+then when you reach $150k you're more than half done
+whereas if you'd said you were raising $500k, you'd be less than a third done at $150k
+if fundraising stalled there for an appreciable time, you'd start to read as a failure
+saying initially that you're raising $250k doesn't limit you to raising that much
+startups do that all the time
+i'm not saying you should lie, but that you should lower your expectations initially
+there is almost no downside in starting with a low number
+it not only won't cap the amount you raise, but will on the whole tend to increase it
+a good metaphor here is angle of attack
+if you try to fly at too steep an angle of attack, you just stall
+be profitable if you can
+if you can make it to profitability without raising any additional money
+there are many analogies between fundraising and dating, and this is one of the strongest
+no one wants you if you seem desperate
+and the best way not to seem desperate is not to be desperate
+and they are then surprised how difficult and unpleasant it is
+of course not all startups can make it to ramen profitability in a few months
+don't optimize for valuation
+founders who raise money at high valuations tend to be unduly proud of it
+this is stupid, because fundraising is not the test that matters
+the real test is revenue
+fundraising is just a means to that end
+being proud of how well you did at fundraising is like being proud of your college grades
+number two is good investors
+valuation is at best third
+the empirical evidence shows just how unimportant it is
+6 million respectively
+so let that satisfy your competitiveness
+you're doing better than dropbox and airbnb at a test that doesn't matter
+it will be easier to raise money at a lower valuation
+it shouldn't be, but it is
+but although it's a mistake for investors to care about price, a significant number do
+yesno before valuation
+some investors want to know what your valuation is before they even talk to you about investing
+fortunately there is a way to avoid naming a price in this situation
+and it is not just a negotiating trick; it's how you (both) should be operating
+then if they decide they do want to invest, you can figure out a price
+but first things first
+this is a safe technique so long as you combine it with the next one
+beware "valuation sensitive" investors
+occasionally you'll encounter investors who describe themselves as "valuation sensitive
+you should therefore never approach such investors first
+this way, you'll not only get market price, but it will also take less time
+so you'd only want to talk to this sort of investor if you were about to do that anyway
+if you're surprised by a lowball offer, treat it as a backup offer and delay responding to it
+but lowballing you is a dick move that should be met with the corresponding countermove
+accept offers greedily
+a greedy algorithm takes the best of the options in front of it right now
+and that is how startups should approach fundraising in phases 2 and later
+if someone makes you an acceptable offer, take it
+if you have multiple incompatible offers, take the best
+don't reject an acceptable offer in the hope of getting a better one in the future
+these simple rules cover a wide variety of cases
+if you're raising money from many investors, roll them up as they say yes
+as you start to feel you've raised enough, the threshold for acceptable will start to get higher
+in practice offers exist for stretches of time, not points
+so when you get an acceptable offer that would be incompatible with others (e
+this could lose you some that might have made an offer if they had more time
+but by definition you don't care; the initial offer was acceptable
+a deadline of three working days is acceptable
+you shouldn't need more than that if you've been talking to investors in parallel
+but a deadline any shorter is a sign you're dealing with a sketchy investor
+you can usually call their bluff, and you may need to
+but if it does, "get the best investors" is in the average case bad advice
+the best investors are also the most selective, because they get their pick of all the startups
+(the situation is different in phase 1
+there's no practical difficulty
+if the smaller investments are on convertible notes, they'll just convert into the series a round
+till they do, you don't know for sure they will, and the greedy algorithm tells you what to do
+don't sell more than 25% in phase 2
+if you do well, you will probably raise a series a round eventually
+i say probably because things are changing with series a rounds
+startups may start to skip them
+which means you should avoid doing things in earlier rounds that will mess up raising an a round
+guess conservatively
+have one person handle fundraising
+(if the founders mistrust one another, this could cause some friction
+even if the ceo is a programmer and another founder is a salesperson? yes
+but wait till that point
+you'll need an executive summary and (maybe) a deck
+traditionally phase 2 fundraising consists of presenting a slide deck in person to investors
+a lot of the most successful startups we fund never make decks in phase 2
+they just talk to investors and explain what they plan to do
+but don't refuse on that account to give copies to investors you meet
+you just have to treat such leaks as a cost of doing business
+in practice it's not that high a cost
+i wouldn't do that
+it's a sign they're not really interested
+stop fundraising when it stops working
+when do you stop fundraising? ideally when you've raised enough
+but what if you haven't raised as much as you'd like? when do you give up?
+when your fundraising options run out, they usually run out in the same way
+don't keep sucking on the straw if you're just getting air
+it's not going to get better
+don't get addicted to fundraising
+the work at an early stage startup often consists of unglamorous schleps
+whereas fundraising, when it's going well, can be quite the opposite
+the danger of fundraising is particularly acute for people who are good at it
+it's always fun to work on something you're good at
+if you're one of these people, beware
+fundraising is not what will make your company successful
+listening to users complain about bugs in your software is what will make you successful
+startups can be destroyed by this
+don't raise too much
+though only a handful of startups have to worry about this, it is possible to raise too much
+the dangers of raising too much are subtle but insidious
+one is that it will set impossibly high expectations
+a company's valuation is expected to rise each time it raises money
+if not it's a sign of a company in trouble, which makes you unattractive to investors
+and you have to be doing really, really well to raise money at $50 million
+but the money itself may be more dangerous than the valuation
+so if you do raise a huge amount of money, don't spend it
+startups raising money occasionally alienate investors by seeming arrogant
+it's a mistake to behave arrogantly to investors
+the only safe strategy is never to seem arrogant at all
+so you must cushion the blow with soft words
+at yc we tell startups they can blame us
+and now that i've written this, everyone else can blame me if they want
+the danger of behaving arrogantly is greatest when you're doing well
+when everyone wants you, it's hard not to let it go to your head
+especially if till recently no one wanted you
+but restrain yourself
+the startup world is a small place, and startups have lots of ups and downs
+this is a domain where it's more true than usual that pride goeth before a fall
+be nice when investors reject you as well
+the best investors are not wedded to their initial opinion of you
+if they reject you in phase 2 and you end up doing well, they'll often invest in phase 3
+in fact investors who reject you are some of your warmest leads for future fundraising
+any investor who spent significant time deciding probably came close to saying yes
+the bar will be higher next time
+assume the money you raise in phase 2 will be the last you ever raise
+you must make it to profitability on this money if you can
+this is probably the optimal strategy for investors
+it's too hard to pick winners early on
+better to let the market do it for you
+but it often comes as a surprise to startups how much harder it is to raise money in phase 3
+the next time you raise money, the experiment has to have worked
+you have to be on a trajectory that leads to going public
+and while there are some ideas where the proof that the experiment worked might consist of e
+query response times, usually the proof is profitability
+usually phase 3 fundraising has to be type a fundraising
+in practice there are two ways startups hose themselves between phases 2 and 3
+some are just too slow to become profitable
+they raise enough money to last for two years
+there doesn't seem any particular urgency to be profitable
+so they don't make any effort to make money for a year
+but by that time, not making money has become habitual
+when they finally decide to try, they find they can't
+the other way companies hose themselves is by letting their expenses grow too fast
+which almost always means hiring too many people
+you usually shouldn't go out and hire 8 people as soon as you raise money at phase 2
+usually you want to wait till you have growth (and thus usually revenues) to justify them
+a lot of vcs will encourage you to hire aggressively
+don't listen to them
+don't make things complicated
+that's fundraising in one sentence
+don't introduce complicated optimizations, and don't let investors introduce complications either
+fundraising is not what will make you successful
+it's just a means to an end
+be good, take care of yourselves, and don't leave the path
+the biggest component in most investors' opinion of you is the opinion of other investors
+which is of course a recipe for exponential growth
+but actually the two are not that highly correlated
+if you understand them, you can at least avoid being surprised
+raising money decreases the risk of failure
+plus a company that has raised money is literally more valuable
+though they're often clueless about technology, most investors are pretty good at reading people
+when fundraising is going well, investors are quick to sense it in your increased confidence
+judging startups is hard even for the best investors
+the mediocre ones might as well be flipping coins
+the best investors aren't influenced much by the opinion of other investors
+it would only dilute their own judgment to average it together with other people's
+this is the fourth way in which offers beget offers
+founders try this sort of thing all the time, and investors are very sensitive to it
+if anything oversensitive
+but you're safe so long as you're telling the truth
+there's no manipulation in that
+do not, however, tell a who b is
+vcs will sometimes ask which other vcs you're talking to, but you should never tell them
+angels you can sometimes tell about other angels, because angels cooperate more with one another
+the second will be easier
+the right way to lift heavy things is to let your legs do the work
+inexperienced founders make the same mistake when trying to convince investors
+they try to convince with their pitch
+investors are looking for startups that will be very successful
+but that test is not as simple as it sounds
+the big successes are so big they dwarf the rest
+but angel investors like big successes too
+the most important ingredient is formidable founders
+[2] every startup has reasons both to invest and not to invest
+if investors think you're a winner they focus on the former, and if not they focus on the latter
+for example, it might be a rich market, but with a slow sales cycle
+they're not necessarily trying to mislead you
+most investors are genuinely unclear in their own minds why they like or dislike startups
+if you seem like a winner, they'll like your idea more
+but don't be too smug about this weakness of theirs, because you have it too; almost everyone does
+there is a role for ideas of course
+they're fuel for the fire that starts with liking the founders
+" (whereas when they don't like you, they'll be saying "but what about x?")
+formidable is close to confident, except that someone could be confident and mistaken
+formidable is roughly justifiably confident
+what should they do? [4]
+what they should not do is try to imitate the swagger of more experienced founders
+investors are not always that good at judging technology, but they're good at judging confidence
+if you try to act like something you're not, you'll just end up in an uncanny valley
+you'll depart from sincere, but never arrive at convincing
+the way to seem most formidable as an inexperienced founder is to stick to the truth
+how formidable you seem isn't a constant
+it varies depending on what you're saying
+that's the secret
+and by convince yourself, i don't mean play mind games with yourself to boost your confidence
+i mean truly evaluate whether your startup is worth investing in
+if it isn't, don't try to raise money
+to evaluate whether your startup is worth investing in, you have to be a domain expert
+which in fact it will usually be
+know everything about your market
+when the unfortunate fellow got to his last slide, the professor burst out:
+which one of these conclusions do you actually believe?
+even if you have no ideas
+you have to produce something
+and all too many startups go into fundraising in the same spirit
+it's when you can convince investors, and not before
+if you try convincing investors before you've convinced yourself, you'll be wasting both your time
+but pausing first to convince yourself will do more than save you from wasting your time
+it will force you to organize your thoughts
+and if you can do that you'll end up with more than added confidence
+you'll also have a provisional roadmap of how to succeed
+no one knows whether a startup is going to succeed
+startup investors know that every investment is a bet, and against pretty long odds
+founders think of startups as ideas, but investors think of them as markets
+your target market has to be big, and it also has to be capturable by you
+but the market doesn't have to be big yet, nor do you necessarily have to be in it yet
+the standard of plausibility varies dramatically depending on the age of the startup
+microsoft for example was not going to grow huge selling basic interpreters
+good, but not great
+no company, however successful, ever looks more than a pretty good bet a few months in
+microcomputers turned out to be a big deal, and microsoft both executed well and got lucky
+but it was by no means obvious that this was how things would play out
+plenty of companies seem as good a bet a few months in
+and who can reasonably expect more of a startup than that?
+if you can make as good a case as microsoft could have, will you convince investors? not always
+a lot of vcs would have rejected microsoft
+[9] certainly some rejected google
+this is arguably a permissible tactic
+it's arguably an instance of scamming a scammer
+if you know you're on the right track, then you also know why investors were wrong to reject you
+experienced investors are well aware that the best ideas are also the scariest
+they all know about the vcs who rejected google
+that's what happened to dropbox
+yet another backup and syncing thing, they all thought
+a couple weeks later, dropbox raised a series a round from sequoia
+you can convince yourself, then convince them
+and when you convince them, use the same matter-of-fact language you used to convince yourself
+you wouldn't use vague, grandiose marketing-speak among yourselves
+don't use it with investors either
+it not only doesn't work on them, but seems a mark of incompetence
+just be concise
+so here's the recipe for impressing investors when you're not already good at seeming formidable:
+make something worth investing in
+understand why it's worth investing in
+explain that clearly to investors
+if you're saying something you know is true, you'll seem confident when you're saying it
+conversely, never let pitching draw you into bullshitting
+as long as you stay on the territory of truth, you're strong
+make the truth good, then just tell it
+one of the most common types of advice we give at y combinator is to do things that don't scale
+a lot of would-be founders believe that startups either take off or don't
+or they don't, in which case the market must not exist
+actually startups take off because the founders make them take off
+a good metaphor would be the cranks that car engines had before they got electric starters
+the most common unscalable thing founders have to do at the start is to recruit users manually
+nearly all startups have to
+you can't wait for users to come to you
+you have to go out and get them
+if anyone could have sat back and waited for users, it was stripe
+but in fact they're famous within yc for aggressive early user acquisition
+at yc we use the term "collison installation" for the technique they invented
+" but the collison brothers weren't going to wait
+there are two reasons founders resist going out and recruiting users individually
+one is a combination of shyness and laziness
+the other reason founders ignore this path is that the absolute numbers seem so small at first
+this can't be how the big, famous startups got started, they think
+the mistake they make is to underestimate the power of compound growth
+we encourage every startup to measure their progress by weekly growth rate
+if you have 100 users, you need to get 10 more next week to grow 10% a week
+after a year you'll have 14,000 users, and after 2 years you'll have 2 million
+airbnb is a classic example of this technique
+marketplaces are so hard to get rolling that you should expect to take heroic measures at first
+that initial fragility was not a unique feature of airbnb
+almost all startups are fragile initially
+they unconsciously judge larval startups by the standards of established ones
+it's harmless if reporters and know-it-alls dismiss your startup
+they always get things wrong
+it's even ok if investors dismiss your startup; they'll change their minds when they see growth
+the big danger is that you'll dismiss your startup yourself
+i've seen it happen
+i often have to encourage founders who don't see the full potential of what they're building
+even bill gates made that mistake
+he returned to harvard for the fall semester after starting microsoft
+they were just trying to survive
+but in retrospect that too was the optimal path to dominating a big market
+otherwise you'll have to make a more deliberate effort to locate the most promising vein of users
+you should take extraordinary measures not just to acquire users, but also to make them happy
+your first users should feel that signing up with you was one of the best choices they ever made
+and you in turn should be racking your brains to think of new ways to delight them
+you can be ornery when you're scotty, but not when you're kirk
+that would be a great problem to have
+see if you can make it happen
+tim cook doesn't send you a hand-written note after you buy a laptop
+but you can
+that's one advantage of being small: you can provide a level of service no big company can
+steve wasn't just using "insanely" as a synonym for "very
+what novice founders don't get is what insanely great translates to in a larval startup
+when steve jobs started using that phrase, apple was already an established company
+that's not hard for engineers to grasp
+it's just a more extreme version of designing a robust and elegant product
+it's not the product that should be insanely great, but the experience of being your user
+the product is just one component of that
+for a big company it's necessarily the dominant one
+can, perhaps, but should? yes
+over-engaging with early users is not just a permissible technique for getting growth rolling
+making a better mousetrap is not an atomic operation
+the feedback you get from engaging directly with your earliest users will be the best you ever get
+sometimes the right unscalable trick is to focus on a deliberately narrow market
+it's like keeping a fire contained at first to get it really hot before adding more logs
+that's what facebook did
+at first it was just for harvard students
+most startups that use the contained fire strategy do it unconsciously
+the strategy works just as well if you do it unconsciously
+among companies, the best early adopters are usually other startups
+plus when they succeed they grow fast, and you with them
+they got started by doing something that really doesn't scale: assembling their routers themselves
+hardware startups face an obstacle that software startups don't
+the minimum order for a factory production run is usually several hundred thousand dollars
+the arrival of crowdfunding (or more precisely, preorders) has helped a lot
+but even so i'd advise startups to pull a meraki initially if they can
+that's what pebble did
+the pebbles assembled the first several hundred watches themselves
+" who knew?
+even if there aren't many of them, there are probably adjacent territories that have more
+consulting is the canonical example of work that doesn't scale
+that's where companies cross the line
+we did that at viaweb
+since we would do anything to get users, we did
+we felt pretty lame at the time
+there's a more extreme variant where you don't just use your software, but are your software
+some startups could be entirely manual at first
+i should mention one sort of initial tactic that usually doesn't work: the big launch
+they want to launch simultaneously in 8 different publications, with embargoes
+and on a tuesday, of course, since they read somewhere that's the optimum day to launch something
+it's easy to see how little launches matter
+think of some successful startups
+so why do founders think launches matter? a combination of solipsism and laziness
+partnerships too usually don't work
+it's not enough just to do something extraordinary initially
+you have to make an extraordinary effort initially
+y combinator has now funded 564 startups including the current batch, which has 53
+7 billion, and the 511 prior to the current batch have collectively raised about $1
+as usual those numbers are dominated by a few big winners
+the top 10 startups account for 8
+6 of that 11
+but there is a peloton of younger startups behind them
+there are about 40 more that have a shot at being really big
+i'd guess we can grow another 2 or 3x before hitting the next bottleneck
+one consequence of funding such a large number of startups is that we see trends early
+i'm going to take a shot at describing where these trends are leading
+i think more
+now there's a third: start your own company
+that's a big change
+i think we're still at the beginning of this one
+it's hard to predict how big a deal it will be
+as big a deal as the industrial revolution? maybe
+probably not
+one thing we can say for sure is that there will be a lot more startups
+this process is not just something happening now in silicon valley
+it started decades ago, and it's happening as far afield as the car industry
+it has a long way to run
+the other big driver of change is that startups are becoming cheaper to start
+which means investors will get less stock and less control
+there are still a lot of people who'd make great founders who never end up starting a company
+you can see that from how randomly some of the most successful startups got started
+there might be 10x or even 50x more good founders out there
+high returns don't come from investing at low valuations
+they come from investing in the companies that do really well
+so if there are more of those to be had each year, the best pickers should have more hits
+this means there should be more variability in the vc business
+whereas the bad firms will get the leftovers, as they do now, and yet pay a higher price for them
+nor do i think it will be a problem that founders keep control of their companies for longer
+what about angels? i think there is a lot of opportunity there
+it used to suck to be an angel investor
+and the days when vcs could wash angels out of the cap table are long gone
+few investors understand the cost that raising money from them imposes on startups
+and in this context, low-cost means deciding quickly
+one is that the scariness of starting a startup in the old days was a pretty effective filter
+now that the cost of failing is becoming lower, we should expect founders to do it more
+that's not a bad thing
+it will be interesting, in a bad way, if idea clashes become a lot more common
+what used to be an obelisk will become a pyramid
+it will be a little wider at the top, but a lot wider at the bottom
+imagine the obelisk of investors that corresponds to the obelisk of startups
+i think the biggest danger for vcs, and also the biggest opportunity, is at the series a stage
+right now, vcs often knowingly invest too much money at the series a stage
+some vcs lie and claim the company really needs that much
+like a lot of bad things, this didn't happen intentionally
+the vc business backed into it as their initial assumptions gradually became obsolete
+what will happen to the vc business when that happens? hell if i know
+but i bet that particular firm will end up ahead
+and that's where the money is
+you can't fight market forces forever
+40% used to be common
+now vcs are fighting to hold the line at 20%
+but i am daily waiting for the line to collapse
+it's going to happen
+you may as well anticipate it, and look bold
+who knows, maybe vcs will make more money by doing the right thing
+it wouldn't be the first time that happened
+venture capital is a business where occasional big successes generate hundredfold returns
+if you want to find new opportunities for investing, look for things founders complain about
+founders are your customers, and the things they complain about are unsatisfied demand
+but the more general recipe is: do something founders want
+the way to get startup ideas is not to try to think of startup ideas
+it's to look for problems, preferably problems you have yourself
+microsoft, apple, yahoo, google, and facebook all began this way
+it sounds obvious to say you should only work on problems that exist
+and yet by far the most common mistake startups make is to solve problems no one has
+i made it myself
+in 1995 i started a company to put art galleries online
+but galleries didn't want to be online
+it's not how the art business works
+so why did i spend 6 months working on this stupid idea? because i didn't pay attention to users
+i invented a model of the world that didn't correspond to reality, and worked from that
+i didn't notice my model was wrong until i tried to convince users to pay for what we'd built
+even then i took embarrassingly long to catch on
+i was attached to my model of the world, and i'd spent a lot of time on the software
+they had to want it
+at yc we call these "made-up" or "sitcom" startup ideas
+imagine one of the characters on a tv show was starting a startup
+the writers would have to invent something for it to do
+but coming up with good startup ideas is hard
+it's not something you can do for the asking
+for example, a social network for pet owners
+it doesn't sound obviously mistaken
+millions of people have pets
+often they care a lot about their pets and spend a lot of money on them
+surely many of these people would like a site where they could talk to other pet owners
+you could serve them targeted offers, and maybe charge for premium features
+" they say "yeah, maybe i could see using something like that
+" even when the startup launches, it will sound plausible to a lot of people
+sum that reaction across the entire population, and you have zero users
+choose the latter
+if you invert the scale on the y axis, you can envision companies as holes
+google is an immense crater: hundreds of millions of people use it, and they need it a lot
+a startup just starting out can't expect to excavate that much volume
+so you have two choices about the shape of hole you start with
+you can either dig a hole that's broad but shallow, or one that's narrow and deep, like a well
+made-up startup ideas are usually of the first type
+lots of people are mildly interested in a social network for pet owners
+nearly all good startup ideas are of the second type
+microsoft was a well when they made altair basic
+thirty years later facebook had the same shape
+you don't need the narrowness of the well per se
+it's depth you need; you get narrowness as a byproduct of optimizing for depth (and speed)
+but you almost always do get it
+facebook was a good idea because it started with a small market there was a fast path out of
+so you spread rapidly through all the colleges
+once you have all the college students, you get everyone else simply by letting them in
+the founders of airbnb didn't realize at first how big a market they were tapping
+initially they had a much narrower idea
+they were going to let hosts rent out space on their floors during conventions
+they didn't foresee the expansion of this idea; it forced itself upon them gradually
+all they knew at first is that they were onto something
+that's probably as much as bill gates or mark zuckerberg knew at first
+occasionally it's obvious from the beginning when there's a path out of the initial niche
+and sometimes i can see a path that's not immediately obvious; that's one of our specialties at yc
+but there are limits to how well this can be done, no matter how much experience you have
+in zen and the art of motorcycle maintenance, robert pirsig says:
+you want to know how to paint a perfect painting? it's easy
+make yourself perfect and then just paint naturally
+i've wondered about that passage since i read it in high school
+i'm not sure how useful his advice is for painting specifically, but it fits this situation well
+empirically, the way to have good startup ideas is to become the sort of person who has them
+you can also be at the leading edge as a user
+but mark already lived online; to him it seemed natural
+paul buchheit says that people at the leading edge of a rapidly changing field "live in the future
+" combine that with pirsig and you get:
+live in the future, then build what's missing
+that describes the way many if not most of the biggest startups got started
+neither apple nor yahoo nor google nor facebook were even supposed to be companies at first
+they grew out of things their founders built because there seemed a gap in the world
+" lots of people heard about the altair
+lots forgot usb sticks
+the verb you want to be using with respect to startup ideas is not "think up" but "notice
+the most successful startups almost all begin this way
+that may not have been what you wanted to hear
+but disappointing though it may be, this is the truth
+and it is a recipe of a sort, just one that in the worst case takes a year rather than a weekend
+if you're not at the leading edge of some rapidly changing field, you can get to one
+for example, anyone reasonably smart can probably get to an edge of programming (e
+building mobile apps) in a year
+especially if you're also looking for a cofounder
+you don't have to learn programming to be at the leading edge of a domain that's changing fast
+other domains change fast
+but while learning to hack is not necessary, it is for the forseeable future sufficient
+as marc andreessen put it, software is eating the world, and this trend has decades left to run
+knowing how to hack also means that when you have ideas, you'll be able to implement them
+that's not absolutely necessary (jeff bezos couldn't) but it's an advantage
+i'll try building an initial version tonight
+what won't be obvious is that they're startup ideas
+most things that are missing will take some time to see
+you almost have to trick yourself into seeing the ideas around you
+but you know the ideas are out there
+this is not one of those problems where there might not be an answer
+it's impossibly unlikely that this is the exact moment when technological progress stops
+and when these problems get solved, they will probably seem flamingly obvious in retrospect
+what you need to do is turn off the filters that usually prevent you from seeing them
+the most powerful is simply taking the current state of the world for granted
+even the most radically open-minded of us mostly do that
+you couldn't get from your bed to the front door if you stopped to question everything
+pay particular attention to things that chafe you
+when something annoys you, it could be because you're living in the future
+it was obvious to us as programmers that these sites would have to be generated by software
+to sit down and try to think of ideas
+give yourself some time
+drew houston did work on a less promising idea before dropbox: an sat prep startup
+but dropbox was a much better idea, both in the absolute sense and also as a match for his skills
+if you do that, you'll naturally tend to build things that are missing
+it wouldn't seem as interesting to build something that already existed
+it's cool; users love it; it just doesn't matter
+microcomputers seemed like toys when apple and microsoft started working on them
+" backrub seemed like an inconsequential science project
+the facebook was just a way for undergrads to stalk one another
+to us that's positive evidence an idea is good
+live in the future and build what seems interesting
+that's what i'd advise college students to do, rather than trying to learn about "entrepreneurship
+" "entrepreneurship" is something you learn best by doing it
+the examples of the most successful founders make that clear
+what you should be spending your time on in college is ratcheting yourself into the future
+college is an incomparable opportunity to do that
+all you'll learn is the words for things
+the clash of domains is a particularly fruitful source of ideas
+or better still, go work for a biotech company
+cs majors normally get summer jobs at computer hardware or software companies
+or don't take any extra classes, and just build things
+it's no coincidence that microsoft and facebook both got started in january
+but don't feel like you have to build things that will become startups
+that's premature optimization
+just build things
+preferably with other students
+you're also surrounded by other people trying to do the same thing
+beware of research
+whereas a phd dissertation is extremely unlikely to
+competition
+because a good idea should seem obvious, when you have one you'll tend to feel that you're late
+don't let that deter you
+worrying that you're late is one of the signs of a good idea
+ten minutes of searching the web will usually settle the question
+even if you find someone else working on the same thing, you're probably not too late
+if you're uncertain, ask users
+the question then is whether that beachhead is big enough
+err on the side of doing things where you'll face competitors
+inexperienced founders usually give competitors more credit than they deserve
+whether you succeed depends far more on you than on your competitors
+so better a good idea with competitors than a bad one without
+in fact that's a very promising starting point
+google was that type of idea
+your thesis has to be more precise than "we're going to make an x that doesn't suck" though
+you have to be able to phrase it in terms of something the incumbents are overlooking
+google was that type of idea too
+they'd prefer not to deal with tedious problems or get involved in messy ways with the real world
+which is a reasonable preference, because such things slow you down
+and dealing with payments is a schlep for stripe, but not an intolerable one
+we overcame this one to work on viaweb
+we could see the problem was one that needed to be solved though
+and even to the degree it isn't, it's a worse form of self-indulgence
+starting a successful startup is going to be fairly laborious no matter what
+the unsexy filter, while still a source of error, is not as entirely useless as the schlep filter
+particularly as you get older and more experienced
+plus if you find an idea sexy, you'll work on it more enthusiastically
+sometimes you need an idea now
+for example, if you're working on a startup and your initial idea turns out to be bad
+for the rest of this essay i'll talk about tricks for coming up with startup ideas on demand
+although empirically you're better off using the organic strategy, you could succeed this way
+you just have to be more disciplined
+you'll see a lot more ideas, most of them bad, so you need to be able to filter them
+one of the biggest dangers of not using the organic method is the example of the organic method
+organic ideas feel like inspirations
+when searching for ideas, look in areas where you have some expertise
+if you're a database expert, don't build a chat app for teenagers (unless you're also a teenager)
+maybe it's a good idea, but you can't trust your judgment about that, so ignore it
+there have to be other ideas that involve databases, and whose quality you can judge
+the place to start looking for ideas is things you need
+there must be things you need
+" if you can think of any x people said that about, you probably have an idea
+you know there's demand, and people don't say that about things that are impossible to build
+you're probably not the only one
+it's especially good if you're different in a way people will increasingly be
+if you're changing ideas, one unusual thing about you is the idea you'd previously been working on
+did you discover any needs while working on it? several well-known startups began this way
+a particularly promising way to be unusual is to be young
+some of the most valuable new ideas take root first among people in their teens and early twenties
+it would have been very hard for someone who wasn't a college student to start facebook
+the next best thing to an unmet need of your own is an unmet need of someone else
+try talking to everyone you can about the gaps they find in the world
+you're just looking for something to spark a thought
+when you find an unmet need that isn't your own, it may be somewhat blurry at first
+the person who needs something may not know exactly what they need
+one way to ensure you do a good job solving other people's problems is to make them your own
+that may seem like taking things to extremes, but startups are extreme
+we love it when founders do such things
+don't try to start twitter
+those ideas are so rare that you can't find them by looking for them
+make something unsexy that people will pay you for
+what would you pay for right now?
+for example, journalism is in free fall at the moment
+but there may still be money to be made from something like journalism
+but imagine asking that in the future, not now
+when one company or industry replaces another, it usually comes in from the side
+and be imaginative about the axis along which the replacement occurs
+it could be replaced on any of these axes (it has already started to be on most)
+the prices of gene sequencing and 3d printing are both experiencing moore's law-like declines
+looking for waves is essentially a way to simulate the organic method
+finding startup ideas is a subtle business, and that's why most people who try fail so miserably
+it doesn't work well simply to try to think of startup ideas
+if you do that, you get bad ones that sound dangerously plausible
+but even then, not immediately
+it takes time to come across situations where you notice something missing
+live in the future and build what seems interesting
+strange as it sounds, that's the real recipe
+one advantage of y combinator's early, broad focus is that we see trends before most other people
+and one of the most conspicuous trends in the last batch was the large number of hardware startups
+out of 84 companies, 7 were making hardware
+on the whole they've done better than the companies that weren't
+they've faced resistance from investors of course
+investors have a deep-seated bias against hardware
+but investors' opinions are a trailing indicator
+there is no one single force driving this trend
+hardware does well on crowdfunding sites
+electric motors have improved
+wireless connectivity of various types can now be taken for granted
+it's getting more straightforward to get things manufactured
+retailers are less of a bottleneck as customers increasingly buy online
+one question i can answer is why hardware is suddenly cool
+it always was cool
+physical things are great
+they just haven't been as great a way to start a rapidly growing business as software
+but that rule may not be permanent
+it's not even that old; it only dates from about 1990
+maybe the advantage of software will turn out to have been temporary
+hackers love to build hardware, and customers love to buy it
+it wouldn't be the first time something was a bad idea till it wasn't
+and it wouldn't be the first time investors learned that lesson from founders
+a startup is a company designed to grow fast
+being newly founded does not in itself make a company a startup
+" the only essential thing is growth
+everything else we associate with startups follows from growth
+if you want to start one it's important to understand that
+startups are so hard that you can't be pointed off to the side and hope to succeed
+you have to know that growth is what you're after
+the good news is, if you get growth, everything else tends to fall into place
+which means you can use growth like a compass to make almost every decision you face
+millions of companies are started every year in the us
+only a tiny fraction are startups
+most are service businessesrestaurants, barbershops, plumbers, and so on
+these are not startups, except in a few unusual cases
+a barbershop isn't designed to grow fast
+whereas a search engine, for example, is
+when i say startups are designed to grow fast, i mean it in two senses
+partly i mean designed in the sense of intended, because most startups fail
+that difference is why there's a distinct word, "startup," for companies designed to grow fast
+we could just talk about super-successful companies and less successful ones
+but in fact startups do have a different sort of dna from other businesses
+google is not just a barbershop whose founders were unusually lucky and hard-working
+google was different from the beginning
+to grow rapidly, you need to make something you can sell to a big market
+that's the difference between google and a barbershop
+a barbershop doesn't scale
+barbershops are doing fine in the (a) department
+almost everyone needs their hair cut
+the problem for a barbershop, as for any retail establishment, is (b)
+a barbershop serves customers in person, and few will travel far for a haircut
+and even if they did the barbershop couldn't accomodate them
+writing software is a great way to solve (b), but you can still end up constrained in (a)
+if you make software to teach english to chinese speakers, however, you're in startup territory
+most businesses are tightly constrained in (a) or (b)
+the distinctive feature of successful startups is that they're not
+it might seem that it would always be better to start a startup than an ordinary business
+if you write software to teach tibetan to hungarians, you won't have much competition
+the constraints that limit ordinary companies also protect them
+that's the tradeoff
+if you start a barbershop, you only have to compete with other local barbers
+if you start a search engine you have to compete with the whole world
+bar  neighborhood is a sufficient idea for a small business
+similarly for companies constrained in (a)
+your niche both protects and defines you
+but that's not how most startups get started
+[3] but at the moment when successful startups get started, much of the innovation is unconscious
+what's different about successful founders is that they can see different problems
+steve wozniak's problem was that he wanted his own computer
+that was an unusual problem to have in 1975
+but technological change was about to make it a much more common one
+google has similar origins
+larry page and sergey brin wanted to search the web
+that's one connection between startup ideas and technology
+rapid change in one area uncovers big, soluble problems in other areas
+sometimes the changes are advances, and what they change is solubility
+but in google's case the most important change was the growth of the web
+what changed there was not solubility but bigness
+how fast does a company have to grow to be considered a startup? there's no precise answer to that
+"startup" is a pole, not a threshold
+starting one is at first no more than a declaration of one's ambitions
+but at first you have no more than commitment
+starting a startup is like being an actor in that respect
+"actor" too is a pole rather than a threshold
+at the beginning of his career, an actor is a waiter who goes to auditions
+the growth of a successful startup usually has three phases:
+eventually a successful startup will grow into a big company
+together these three phases produce an s-curve
+the phase whose growth defines the startup is the second one, the ascent
+its length and slope determine how big the company will be
+the slope is the company's growth rate
+if there's one number every founder should always know, it's the company's growth rate
+that's the measure of a startup
+if you don't know that number, you don't even know if you're doing well or badly
+" that's not a rate
+a good growth rate during yc is 5-7% a week
+if you can hit 10% a week you're doing exceptionally well
+if you can only manage 1%, it's a sign you haven't yet figured out what you're doing
+the best thing to measure the growth rate of is revenue
+the next best, for startups that aren't charging initially, is active users
+the key word here is "just
+" if they decide to grow at 7% a week and they hit that number, they're successful for that week
+there's nothing more they need to do
+programmers will recognize what we're doing here
+we're turning starting a startup into an optimization problem
+you don't have to think about what the program should do, just make it faster
+for most programmers this is very satisfying work
+judging yourself by weekly growth doesn't mean you can look no more than a week ahead
+it's not that you don't think about the future, just that you think about it no more than necessary
+in theory this sort of hill-climbing could get a startup into trouble
+they could end up on a local maximum
+but in practice that never happens
+nine times out of ten, sitting around strategizing is just a form of procrastination
+whereas founders' intuitions about which hill to climb are usually better than they realize
+plus the maxima in the space of startup ideas are not spiky and isolated
+most fairly good ideas are adjacent to even better ones
+the fascinating thing about optimizing for growth is that it can actually discover startup ideas
+you can use the need for growth as a form of evolutionary pressure
+there's a parallel here to small businesses
+for startups, growth is a constraint much like truth
+every successful startup is at least partly a product of the imagination of growth
+if we project forward we see why
+weeklyyearly
+a company that grows at 1% a week will grow 1
+7x a year, whereas a company that grows at 5% a week will grow 12
+a startup that grows at 5% a week will in 4 years be making $25 million a month
+what happens to fast growing startups tends to surprise even the founders
+small variations in growth rate produce qualitatively different outcomes
+and, strangely enough, it's also why they fail so frequently
+for the right peoplee
+the young bill gatesthe probability might be 20% or even 50%
+so it's not surprising that so many want to take a shot at it
+and since the latter is huge the former should be too
+this doesn't bother me
+it's the same with other high-beta vocations, like being an actor or a novelist
+i've long since gotten used to it
+but it seems to bother a lot of people, particularly those who've started ordinary businesses
+if they stepped back and looked at the whole picture they might be less indignant
+if you judge by the median startup, the whole concept of a startup seems like a fraud
+but it's a mistake to use the median in a domain with so much variation
+the test of any investment is the ratio of return to risk
+but that's not the only reason investors like startups
+the other way to get returns from an investment is in the form of dividends
+the founders can't enrich themselves without also enriching the investors
+why do founders want to take the vcs' money? growth, again
+the constraint between good ideas and growth operates in both directions
+it's not merely that you need a scalable idea to grow
+if you have such an idea and don't grow fast enough, competitors will
+almost every company needs some amount of funding to get started
+but startups often raise money even when they are or could be profitable
+fundamentally that's how the most successful startups view fundraising
+raising money lets you choose your growth rate
+a profitable startup could if it wanted just grow on its own revenues
+growing slower might be slightly dangerous, but chances are it wouldn't kill them
+pretty much every successful startup will get acquisition offers too
+why? what is it about startups that makes other companies want to buy them? [13]
+but acquirers have an additional reason to want startups
+a rapidly growing company is not merely valuable, but dangerous
+if it keeps expanding, it might expand into the acquirer's own territory
+most product acquisitions have some component of fear
+the combination of founders, investors, and acquirers forms a natural ecosystem
+just as our ancestors did to explain the apparently too neat workings of the natural world
+but there is no secret cabal making it all work
+to anyone who knows mark zuckerberg that is the reductio ad absurdum of the initial assumption
+if you want to understand startups, understand growth
+growth drives everything in this world
+and growth explains why successful startups almost invariably get acquisition offers
+to acquirers a fast-growing company is not merely valuable but dangerous too
+understanding growth is what starting a startup consists of
+you're committing to search for one of the rare ideas that generates rapid growth
+because these ideas are so valuable, finding one is hard
+the startup is the embodiment of your discoveries so far
+a startup founder is in effect an economic research scientist
+most don't discover anything that remarkable, but some discover relativity
+the first rule i knew intellectually, but didn't really grasp till it happened to us
+the total value of the companies we've funded is around 10 billion, give or take a few
+but just two companies, dropbox and airbnb, account for about three quarters of it
+in startups, the big winners are big to a degree that violates our expectations about variation
+that yields all sorts of strange consequences
+and yet it's true
+[2] you need to do what you know intellectually to be right, even though it feels wrong
+it's a constant battle for us
+it's hard to make ourselves take enough risks
+when you interview a startup and think "they seem likely to succeed," it's hard not to fund them
+their chances of succeeding seem small
+unfortunately picking winners is harder than that
+that's made harder by the fact that the best startup ideas seem at first like bad ideas
+so the most successful founders tend to work on ideas that few beside them realize are good
+" the intersection is the sweet spot for startups
+this concept is a simple one and yet seeing it as a venn diagram is illuminating
+it reminds you that there is an intersectionthat there are good ideas that seem bad
+it also reminds you that the vast majority of ideas that seem bad are bad
+the fact that the best ideas seem like bad ideas makes it even harder to recognize the big winners
+one could have described microsoft and apple in exactly the same terms
+harder still
+wait, it gets worse
+when you pick a big winner, you won't know it for two years
+meanwhile, the one thing you can measure is dangerously misleading
+but we know that's the wrong metric
+except an inverse one
+that's the scary thing: fundraising is not merely a useless metric, but positively misleading
+the big winners could generate 10,000x returns
+it takes a conscious effort not to do that too
+but those are the wrong eyes to look through
+we can afford to take at least 10x as much risk as demo day investors
+and since risk is usually proportionate to reward, if you can afford to take more risk you should
+i don't know what fraction of them currently raise more after demo day
+[5] but the percentage is certainly way over 30%
+and frankly the thought of a 30% success rate at fundraising makes my stomach clench
+a demo day where only 30% of the startups were fundable would be a shambles
+everyone would agree that yc had jumped the shark
+we ourselves would feel that yc had jumped the shark
+and yet we'd all be wrong
+for better or worse that's never going to be more than a thought experiment
+we could never stand it
+i can make up all sorts of plausible justifications
+it might dilute the value of the alumni network
+i'm not a very good speaker
+i say "um" a lot
+sometimes i have to pause when i lose my train of thought
+i wish i were a better speaker
+but i don't wish i were a better speaker like i wish i were a better writer
+having good ideas is most of writing well
+i first noticed this at a conference several years ago
+there was another speaker who was much better than me
+he had all of us roaring with laughter
+i seemed awkward and halting by comparison
+afterward i put my talk online like i usually do
+boy was he good
+so i decided i'd pay close attention to what he said, to learn how he did it
+after about ten sentences i found myself thinking "i don't want to be a good speaker
+for example, when i give a talk i usually write it out beforehand
+but here again there's a tradeoff between smoothness and ideas
+all the time you spend practicing a talk, you could instead spend making it better
+but i always end up spending most of the time rewriting it instead
+every talk i give ends up being given from a manuscript full of things crossed out and rewritten
+depending on your audience, there are even worse tradeoffs than these
+that's true in writing too of course, but the descent is steeper with talks
+any given person is dumber as a member of an audience than as a reader
+every audience is an incipient mob, and a good speaker uses that
+so are talks useless? they're certainly inferior to the written word as a source of ideas
+but that's not all talks are good for
+when i go to a talk, it's usually because i'm interested in the speaker
+talks are also good at motivating me to do things
+it's probably no coincidence that so many famous speakers are described as motivational speakers
+that may be what public speaking is really for
+it's probably what it was originally for
+the emotional reactions you can elicit with a talk can be a powerful force
+i wish i could say that force was more often used for good than ill, but i'm not sure
+one of the cases he decided was brought by the owner of a food shop
+the owner wanted the student to pay for the smells he was enjoying
+the student was stealing his smells
+it sounds ridiculous to us to treat smells as property
+but i can imagine scenarios in which one could charge for smells
+imagine we were living on a moon base where we had to buy air by the liter
+i could imagine air suppliers adding scents at an extra charge
+the reason it seems ridiculous to us to treat smells as property is that it wouldn't work to
+it would work on a moon base, though
+what counts as property depends on what works to treat as property
+and that not only can change, but has changed
+but hunter gatherers didn't treat land, for example, as property in the way we do
+[2] but we are in the midst of such a change now
+but with the arrival of networks, it's as if we've moved to a planet with a breathable atmosphere
+data moves like smells now
+but building new things takes too long
+people should be able to charge for content when it works to charge for content
+but by "works" i mean something more subtle than "when they can get away with it
+" i mean when people can charge for content without warping society in order to do it
+the crazy legal measures that the labels and studios have been taking have a lot of that flavor
+newspapers and magazines are just as screwed, but they are at least declining gracefully
+the riaa and mpaa would make us breathe through tubes if they could
+ultimately it comes down to common sense
+this is where it's helpful to have working democracies and multiple sovereign countries
+private property is an extremely useful ideaarguably one of our greatest inventions
+so far, each new definition of it has brought us increasing material wealth
+[4] it seems reasonable to suppose the newest one will too
+in this essay i'm going to demonstrate this phenomenon by describing some
+any one of them could make you a billionaire
+don't worry, it's not a sign of weakness
+arguably it's a sign of sanity
+the biggest startup ideas are terrifying
+and not just because they'd be a lot of work
+she says to him:
+here's the thing: if you ever got me, you wouldn't have a clue what to do with me
+that's what these ideas say to us
+this phenomenon is one of the most important things you can understand about startups
+[1] you'd expect big startup ideas to be attractive, but actually they tend to repel you
+and that has a bunch of consequences
+even the most ambitious people are probably best off approaching them obliquely
+a new search engine
+the best ideas are just on the right side of impossible
+i don't know if this one is possible, but there are signs it might be
+that was not a natural move for microsoft
+they did it because they were afraid of google, and google was in the search business
+microsoft : google :: google : facebook
+google used to give me a page of the right answers, fast, with no clutter
+and the pages don't have the clean, sparse feel they used to
+google search results used to look like the output of a unix utility
+now if i accidentally put the cursor in the wrong place, anything might happen
+the way to win here is to build the search engine all the hackers use
+and for the first time in over a decade the idea of switching seems thinkable to me
+feel free to make it excessively hackerish
+make it really good for code search, for example
+replace email
+email was not designed to be used the way we use it now
+email is not a messaging protocol
+it's a todo list
+or rather, my inbox is a todo list, and email is the way things get onto it
+but it is a disastrously bad todo list
+as a todo list protocol, the new protocol should give more power to the recipient than email does
+i want there to be more restrictions on what someone can put on my todo list
+) when does it have to be done?
+this is one of those ideas that's like an irresistible force meeting an immovable object
+on one hand, entrenched protocols are impossible to replace
+and if email is going to get replaced eventually, why not now?
+they're all at the mercy of email too
+whatever you build, make it fast
+gmail has become painfully slow
+gmail is slow because google can't afford to spend a lot on it
+but people will pay for this
+i'd have no problem paying $50 a month
+at least $1000 a month
+replace universities
+people are all over this idea lately, and i think they're onto something
+one could do a lot better for a lot less money
+i don't think universities will disappear
+they won't be replaced wholesale
+they'll just lose the de facto monopoly on certain types of learning that they once had
+y combinator itself is arguably one of them
+if learning breaks up into many little pieces, credentialling may separate from it
+universities seem the place to start
+internet drama
+hollywood has been slow to embrace the internet
+a lot of the reason is the horribleness of cable clients, also known as tvs
+our family didn't wait for apple tv
+we hated our last tv so much that a few months ago we replaced it with an imac bolted to the wall
+more can be stolen by things that are a little more closely related, like games
+there are two ways delivery and payment could play out
+if that's the way things play out, there will also be a need for such infrastructure companies
+the next steve jobs
+his answer was simply "no
+" i already feared that would be the answer
+i asked more to see how he'd qualify it
+but he didn't qualify it at all
+no, there will be no more great new stuff beyond whatever's currently in the pipeline
+so if apple's not going to make the next ipad, who is? none of the existing players
+so the company that creates the next wave of hardware is probably going to have to be a startup
+i realize it sounds preposterously ambitious for a startup to try to become as big as apple
+but no more ambitious than it was for apple to become as big as apple, and they did it
+steve jobs has shown us what's possible
+now steve is gone there's a vacuum we can all feel
+if a new company led boldly into the future of hardware, users would follow
+the ceo of that company, the "next steve jobs," might not measure up to steve jobs
+but he wouldn't have to
+he'd just have to do a better job than samsung and hp and nokia, and that seems pretty doable
+bring back moore's law
+the last 10 years have reminded us what moore's law actually says
+actually what it says is that circuit densities will double every 18 months
+it used to seem pedantic to point that out
+not any more
+intel can no longer give us faster cpus, just more of them
+this moore's law is not as good as the old one
+there are several ways to approach this problem
+and if it's not impossible but simply very hard, it might be worth trying to write it
+the expected value would be high even if the chance of succeeding was low
+the reason the expected value is so high is web services
+and that would in turn mean that you got practically all the users
+they'd take most of intel's business
+then the programmer still does much of the work of optimization
+these people might be your employees, or you might create a marketplace for optimization
+i realize how crazy all this sounds
+in fact, what i like about this idea is all the different ways in which it's wrong
+trying to write the sufficiently smart compiler is by definition a mistake
+now that's what i call a startup idea
+ongoing diagnosis
+for example, in 2004 bill clinton found he was feeling short of breath
+it seems reasonable to assume bill clinton has the best medical care available
+ditto for cancer
+cancer will show up on some sort of radar screen immediately
+(of course, what shows up on the radar screen may be different from what we think of now as cancer
+for example, a friend of mine once had her brain scanned as part of a study
+she was horrified when the doctors running the study discovered what appeared to be a large tumor
+after further testing, it turned out to be a harmless cyst
+but it cost her a few days of terror
+but i think that's just an artifact of current limitations
+there is room for a lot of startups here
+let me conclude with some tactical advice
+don't say, for example, that you're going to replace email
+if you do that you raise too many expectations
+just say you're building todo-list software
+that sounds harmless
+people can notice you've replaced email when it's a fait accompli
+empirically, the way to do really big things seems to be to start with deceptively small things
+empirically, it's not just for other people that you need to start small
+you need to for your own sake
+neither bill gates nor mark zuckerberg knew at first how big their companies were going to get
+all they knew was that they were onto something
+you'll be better off if you operate like columbus and just head in a general westerly direction
+start with something you know works, and when you expand, expand westward
+it felt as if there was some kind of wall between us
+i could never quite tell if they understood what i was saying
+you won't have to babysit the round to make sure it happens
+was there some kind of inverse relation between resourcefulness and being hard to talk to?
+you don't have to explain in detail; they'll chase down all the implications
+that's the connection
+it's conversational resourcefulness
+they traversed idea space as gingerly as a very old person traverses the physical world
+the unsuccessful founders weren't stupid
+they just weren't eager to
+so being hard to talk to was not what was killing the unsuccessful startups
+it was a sign of an underlying lack of resourcefulness
+that's what was killing them
+but the most immediate evidence i had that something was amiss was that i couldn't talk to them
+there are great startup ideas lying around unexploited right under our noses
+one reason we don't see them is a phenomenon i call schlep blindness
+schlep was originally a yiddish word but has passed into general use in the us
+it means a tedious, unpleasant task
+no one likes schleps, but hackers especially dislike them
+maybe that's possible, but i haven't seen it
+one of the many things we do at y combinator is teach hackers about the inevitability of schleps
+no, you can't start a startup by just writing code
+i remember going through this realization myself
+a company is defined by the schleps it will undertake
+and schleps should be dealt with the same way you'd deal with a cold swimming pool: just jump in
+the most dangerous thing about our dislike of schleps is that much of it is unconscious
+your unconscious won't even let you see ideas that involve painful schleps
+that's schlep blindness
+the phenomenon isn't limited to startups
+their unconscious mind decides for them, shrinking from the work involved
+the most striking example i know of schlep blindness is stripe, or rather stripe's idea
+thousands of people must have known about this problem
+you'd have to make deals with banks
+plus there are probably all sorts of regulations to comply with
+it's a lot more intimidating to start a startup like this than a recipe site
+that scariness makes ambitious ideas doubly valuable
+(this is also true of starting a startup generally
+maybe that's one reason the most successful startups of all so often have young founders
+in practice the founders grow with the problems
+but no one seems able to foresee that, not even older, more experienced founders
+they don't know how much they can grow, but they also don't know how much they'll need to
+older founders only make the first mistake
+ignorance can't solve everything though
+some ideas so obviously entail alarming schleps that anyone can see them
+how do you see ideas like that? the trick i recommend is to take yourself out of the picture
+somehow it's as if most places were sprayed with startupicide
+i wondered about this for years
+a couple weeks ago i finally figured it out
+i was framing the question wrong
+the problem is not that most towns kill startups
+it's that death is the default for startups, and most towns don't save them
+startups in other places are just doing what startups naturally do: fail
+the real question is, what's saving startups in places like silicon valley? [2]
+environment
+and what drives them both is the number of startup people around you
+it's quite a leap to start a startup
+it's an unusual thing to do
+but in silicon valley it seems normal
+in most places, if you start a startup, people treat you as if you're unemployed
+having people around you care about what you're doing is an extraordinarily powerful force
+even the most willful people are susceptible to it
+he responded so eagerly that for about half a second i found myself considering doing it
+in most other cities, the prospect of starting a startup just doesn't seem real
+in the valley it's not only real but fashionable
+that no doubt causes a lot of people to start startups who shouldn't
+but i think that's ok
+the second component of the antidote is chance meetings with people who can help you
+the reason startups are more likely to make it here is that great things happen to them too
+in the valley, lightning has a sign bit
+and moreover has advanced views, for 2004, on founders retaining control of their companies
+you can't say precisely what the miracle will be, or even for sure that one will happen
+i bet this is true even for startups we fund
+chance meetings play a role like the role relaxation plays in having ideas
+the critical thing in both cases is that they drift just the right amount
+the meeting between larry page and sergey brin was a good example
+for larry page the most important component of the antidote was sergey brin, and vice versa
+the antidote is people
+i'm not sure why this is so
+a large part of yc's function is to accelerate that process
+to make a startup hub, you need a lot of people interested in startups
+there are three reasons
+the first, obviously, is that if you don't have enough density, the chance meetings don't happen
+sean parker was exactly what facebook needed in 2004
+this is one of the reasons we fund such a large number of companies, incidentally
+in most places the atmosphere pulls you back toward the mean
+i flew into the bay area a few days ago
+i notice this every time i fly over the valley: somehow you can sense something is going on
+obviously you can sense prosperity in how well kept a place looks
+but there are different kinds of prosperity
+silicon valley doesn't look like boston, or new york, or la, or dc
+    </textarea>
   </div>
   <div id="prepro_status"></div>
 
@@ -172,432 +2022,6 @@ <h1>Deep Recurrent Nets character generation demo</h1>
 
   </div>
 </div>
-<script type="text/javascript">
-  function randomMath() {
-    var left = Math.floor(Math.random() * 10);
-    var right = Math.floor(Math.random() * 10);
-    return left + '+' + right + '=' + (left + right);
-  }
-  var inputs = [];
-  for (var i = 0;i < 1000; i++) {
-    inputs.push(randomMath());
-  }
-  document.getElementById('ti').innerHTML = inputs.join('\n');
-
-  // prediction params
-  var sample_softmax_temperature = 1.0; // how peaky model predictions should be
-  var max_chars_gen = 100; // max length of generated sentences
-
-  // various global var inits
-  var epoch_size = -1;
-  var input_size = -1;
-  var output_size = -1;
-  var letterToIndex = {};
-  var indexToLetter = {};
-  var vocab = [];
-  var data_sents = [];
-  var solver = new R.Solver(); // should be class because it needs memory for step caches
-  var pplGraph = new Rvis.Graph();
-
-  var model = {};
-
-  var initVocab = function(sents, count_threshold) {
-    // go over all characters and keep track of all unique ones seen
-    var txt = sents.join(''); // concat all
 
-    // count up all characters
-    var d = {};
-    for(var i=0,n=txt.length;i<n;i++) {
-      var txti = txt[i];
-      if(txti in d) { d[txti] += 1; }
-      else { d[txti] = 1; }
-    }
-
-    // filter by count threshold and create pointers
-    letterToIndex = {};
-    indexToLetter = {};
-    vocab = [];
-    // NOTE: start at one because we will have START and END tokens!
-    // that is, START token will be index 0 in model letter vectors
-    // and END token will be index 0 in the next character softmax
-    var q = 1;
-    for(ch in d) {
-      if(d.hasOwnProperty(ch)) {
-        if(d[ch] >= count_threshold) {
-          // add character to vocab
-          letterToIndex[ch] = q;
-          indexToLetter[q] = ch;
-          vocab.push(ch);
-          q++;
-        }
-      }
-    }
-
-    // globals written: indexToLetter, letterToIndex, vocab (list), and:
-    input_size = vocab.length + 1;
-    output_size = vocab.length + 1;
-    epoch_size = sents.length;
-    $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
-  }
-
-  var utilAddToModel = function(modelto, modelfrom) {
-    for(var k in modelfrom) {
-      if(modelfrom.hasOwnProperty(k)) {
-        // copy over the pointer but change the key to use the append
-        modelto[k] = modelfrom[k];
-      }
-    }
-  }
-
-  var initModel = function() {
-    // letter embedding vectors
-    var model = {};
-    model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
-
-    if(generator === 'rnn') {
-      var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
-      utilAddToModel(model, rnn);
-    } else {
-      var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
-      utilAddToModel(model, lstm);
-    }
-
-    return model;
-  }
-
-  var reinit_learning_rate_slider = function() {
-    // init learning rate slider for controlling the decay
-    // note that learning_rate is a global variable
-    $("#lr_slider").slider({
-      min: Math.log10(0.01) - 3.0,
-      max: Math.log10(0.01) + 0.05,
-      step: 0.05,
-      value: Math.log10(learning_rate),
-      slide: function( event, ui ) {
-        learning_rate = Math.pow(10, ui.value);
-        $("#lr_text").text(learning_rate.toFixed(5));
-      }
-    });
-    $("#lr_text").text(learning_rate.toFixed(5));
-  }
-
-  var reinit = function() {
-    // note: reinit writes global vars
-
-    // eval options to set some globals
-    eval($("#newnet").val());
-
-    reinit_learning_rate_slider();
-
-    solver = new R.Solver(); // reinit solver
-    pplGraph = new Rvis.Graph();
-
-    ppl_list = [];
-    tick_iter = 0;
-
-    // process the input, filter out blanks
-    var data_sents_raw = $('#ti').val().split('\n');
-    data_sents = [];
-    for(var i=0;i<data_sents_raw.length;i++) {
-      var sent = data_sents_raw[i].trim();
-      if(sent.length > 0) {
-        data_sents.push(sent);
-      }
-    }
-
-    initVocab(data_sents, 1); // takes count threshold for characters
-    model = initModel();
-  }
-
-  var saveModel = function() {
-    var out = {};
-    out['hidden_sizes'] = hidden_sizes;
-    out['generator'] = generator;
-    out['letter_size'] = letter_size;
-    var model_out = {};
-    for(var k in model) {
-      if(model.hasOwnProperty(k)) {
-        model_out[k] = model[k].toJSON();
-      }
-    }
-    out['model'] = model_out;
-    var solver_out = {};
-    solver_out['decay_rate'] = solver.decay_rate;
-    solver_out['smooth_eps'] = solver.smooth_eps;
-    step_cache_out = {};
-    for(var k in solver.step_cache) {
-      if(solver.step_cache.hasOwnProperty(k)) {
-        step_cache_out[k] = solver.step_cache[k].toJSON();
-      }
-    }
-    solver_out['step_cache'] = step_cache_out;
-    out['solver'] = solver_out;
-    out['letterToIndex'] = letterToIndex;
-    out['indexToLetter'] = indexToLetter;
-    out['vocab'] = vocab;
-    $("#tio").val(JSON.stringify(out));
-  }
-
-  var loadModel = function(j) {
-    hidden_sizes = j.hidden_sizes;
-    generator = j.generator;
-    letter_size = j.letter_size;
-    model = {};
-    for(var k in j.model) {
-      if(j.model.hasOwnProperty(k)) {
-        var matjson = j.model[k];
-        model[k] = new R.Mat(1,1);
-        model[k].fromJSON(matjson);
-      }
-    }
-    solver = new R.Solver(); // have to reinit the solver since model changed
-    solver.decay_rate = j.solver.decay_rate;
-    solver.smooth_eps = j.solver.smooth_eps;
-    solver.step_cache = {};
-    for(var k in j.solver.step_cache){
-      if(j.solver.step_cache.hasOwnProperty(k)){
-        var matjson = j.solver.step_cache[k];
-        solver.step_cache[k] = new R.Mat(1,1);
-        solver.step_cache[k].fromJSON(matjson);
-      }
-    }
-    letterToIndex = j['letterToIndex'];
-    indexToLetter = j['indexToLetter'];
-    vocab = j['vocab'];
-
-    // reinit these
-    ppl_list = [];
-    tick_iter = 0;
-  }
-
-  var forwardIndex = function(G, model, ix, prev) {
-    var x = G.rowPluck(model['Wil'], ix);
-    // forward prop the sequence learner
-    if(generator === 'rnn') {
-      var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
-    } else {
-      var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
-    }
-    return out_struct;
-  }
-
-  var predictSentence = function(model, samplei, temperature) {
-    if(typeof samplei === 'undefined') { samplei = false; }
-    if(typeof temperature === 'undefined') { temperature = 1.0; }
-
-    var G = new R.Graph(false);
-    var s = '';
-    var prev = {};
-    while(true) {
-
-      // RNN tick
-      var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
-      var lh = forwardIndex(G, model, ix, prev);
-      prev = lh;
-
-      // sample predicted letter
-      logprobs = lh.o;
-      if(temperature !== 1.0 && samplei) {
-        // scale log probabilities by temperature and renormalize
-        // if temperature is high, logprobs will go towards zero
-        // and the softmax outputs will be more diffuse. if temperature is
-        // very low, the softmax outputs will be more peaky
-        for(var q=0,nq=logprobs.w.length;q<nq;q++) {
-          logprobs.w[q] /= temperature;
-        }
-      }
-
-      probs = R.softmax(logprobs);
-      if(samplei) {
-        var ix = R.samplei(probs.w);
-      } else {
-        var ix = R.maxi(probs.w);
-      }
-
-      if(ix === 0) break; // END token predicted, break out
-      if(s.length > max_chars_gen) { break; } // something is wrong
-
-      var letter = indexToLetter[ix];
-      s += letter;
-    }
-    return s;
-  }
-
-  var costfun = function(model, sent) {
-    // takes a model and a sentence and
-    // calculates the loss. Also returns the Graph
-    // object which can be used to do backprop
-    var n = sent.length;
-    var G = new R.Graph();
-    var log2ppl = 0.0;
-    var cost = 0.0;
-    var prev = {};
-    for(var i=-1;i<n;i++) {
-      // start and end tokens are zeros
-      var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
-      var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
-
-      lh = forwardIndex(G, model, ix_source, prev);
-      prev = lh;
-
-      // set gradients into logprobabilities
-      logprobs = lh.o; // interpret output as logprobs
-      probs = R.softmax(logprobs); // compute the softmax probabilities
-
-      log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
-      cost += -Math.log(probs.w[ix_target]);
-
-      // write gradients into log probabilities
-      logprobs.dw = probs.w;
-      logprobs.dw[ix_target] -= 1
-    }
-    var ppl = Math.pow(2, log2ppl / (n - 1));
-    return {'G':G, 'ppl':ppl, 'cost':cost};
-  }
-
-  function median(values) {
-    values.sort( function(a,b) {return a - b;} );
-    var half = Math.floor(values.length/2);
-    if(values.length % 2) return values[half];
-    else return (values[half-1] + values[half]) / 2.0;
-  }
-
-  var ppl_list = [];
-  var tick_iter = 0;
-  var tick = function() {
-
-    // sample sentence fromd data
-    var sentix = R.randi(0,data_sents.length);
-    var sent = data_sents[sentix];
-
-    var t0 = +new Date();  // log start timestamp
-
-    // evaluate cost function on a sentence
-    var cost_struct = costfun(model, sent);
-
-    // use built up graph to compute backprop (set .dw fields in mats)
-    cost_struct.G.backward();
-    // perform param update
-    var solver_stats = solver.step(model, learning_rate, regc, clipval);
-    //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
-
-    var t1 = +new Date();
-    var tick_time = t1 - t0;
-
-    ppl_list.push(cost_struct.ppl); // keep track of perplexity
-
-    // evaluate now and then
-    tick_iter += 1;
-    if(tick_iter % 50 === 0) {
-      // draw samples
-      $('#samples').html('');
-      for(var q=0;q<5;q++) {
-        var pred = predictSentence(model, true, sample_softmax_temperature);
-        var pred_div = '<div class="apred">'+pred+'</div>'
-        $('#samples').append(pred_div);
-      }
-    }
-    if(tick_iter % 10 === 0) {
-      // draw argmax prediction
-      $('#argmax').html('');
-      var pred = predictSentence(model, false);
-      var pred_div = '<div class="apred">'+pred+'</div>'
-      $('#argmax').append(pred_div);
-
-      // keep track of perplexity
-      $('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
-      $('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
-      $('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
-
-      if(tick_iter % 100 === 0) {
-        var median_ppl = median(ppl_list);
-        ppl_list = [];
-        pplGraph.add(tick_iter, median_ppl);
-        pplGraph.drawSelf(document.getElementById("pplgraph"));
-      }
-    }
-  }
-
-  var gradCheck = function() {
-    var model = initModel();
-    var sent = '^test sentence$';
-    var cost_struct = costfun(model, sent);
-    cost_struct.G.backward();
-    var eps = 0.000001;
-
-    for(var k in model) {
-      if(model.hasOwnProperty(k)) {
-        var m = model[k]; // mat ref
-        for(var i=0,n=m.w.length;i<n;i++) {
-
-          oldval = m.w[i];
-          m.w[i] = oldval + eps;
-          var c0 = costfun(model, sent);
-          m.w[i] = oldval - eps;
-          var c1 = costfun(model, sent);
-          m.w[i] = oldval;
-
-          var gnum = (c0.cost - c1.cost)/(2 * eps);
-          var ganal = m.dw[i];
-          var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
-          if(relerr > 1e-1) {
-            console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
-          }
-        }
-      }
-    }
-  }
-
-  var iid = null;
-  $(function() {
-
-    // attach button handlers
-    $('#learn').click(function(){
-      reinit();
-      if(iid !== null) { clearInterval(iid); }
-      iid = setInterval(tick, 0);
-    });
-    $('#stop').click(function(){
-      if(iid !== null) { clearInterval(iid); }
-      iid = null;
-    });
-    $("#resume").click(function(){
-      if(iid === null) {
-        iid = setInterval(tick, 0);
-      }
-    });
-
-    $("#savemodel").click(saveModel);
-    $("#loadmodel").click(function(){
-      var j = JSON.parse($("#tio").val());
-      loadModel(j);
-    });
-
-    $("#loadpretrained").click(function(){
-      $.getJSON("lstm_100_model.json", function(data) {
-        pplGraph = new Rvis.Graph();
-        learning_rate = 0.0001;
-        reinit_learning_rate_slider();
-        loadModel(data);
-      });
-    });
-
-    $("#learn").click(); // simulate click on startup
-
-    //$('#gradcheck').click(gradCheck);
-
-    $("#temperature_slider").slider({
-      min: -1,
-      max: 1.05,
-      step: 0.05,
-      value: 0,
-      slide: function( event, ui ) {
-        sample_softmax_temperature = Math.pow(10, ui.value);
-        $("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
-      }
-    });
-  });
-
-</script>
 </body>
 </html>
diff --git a/math_demo.html b/math_demo.html
new file mode 100644
index 0000000..f3dc50f
--- /dev/null
+++ b/math_demo.html
@@ -0,0 +1,603 @@
+<html>
+<head>
+<title>RecurrentJS Sentence Memorization Demo</title>
+
+<style>
+body {
+  font-family: Arial, "Helvetica Neue", Helvetica, sans-serif;
+  color: #333;
+  padding: 20px;
+}
+#argmax {
+  background-color: #DFD;
+}
+#ppl {
+  color: #090;
+  font-size: 20px;
+}
+#epoch {
+  color: #900;
+  font-size: 20px;
+}
+.apred {
+  padding: 2px;
+  margin: 5px;
+  overflow: hidden;
+  height: 20px;
+  font-size: 14px;
+}
+#prepro_status {
+  background-color: #FFD;
+  padding: 5px;
+}
+#status {
+  padding: 2px;
+  margin-top: 5px;
+}
+#controls {
+  margin: 5px;
+}
+.theslider {
+  width:90%;
+  display: inline-block;
+}
+.slider_value {
+  width: 9%;
+  display: inline-block;
+}
+#wrap {
+  width: 800px;
+  margin-right: auto;
+  margin-left: auto;
+  margin-bottom: 200px;
+}
+.abutton {
+  width: 120px;
+  height: 30px;
+  margin: 10px 10px 10px 0px;
+}
+.hh {
+  background-color: #EEE;
+  padding: 5px;
+  margin-top: 5px;
+  border-bottom: 1px solid #999;
+  margin-bottom: 2px;
+}
+#pplgraph {
+  float: right;
+}
+#intro {
+  text-align: justify;
+}
+</style>
+<link href="external/jquery-ui.min.css" rel="stylesheet">
+
+<script src="external/jquery-1.8.3.min.js"></script>
+<script src="external/jquery-ui.min.js"></script>
+
+<script src="src/recurrent.js"></script>
+<script src="src/vis.js"></script>
+</head>
+
+<body>
+<a href="https://github.com/karpathy/recurrentjs"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
+
+
+<div id="wrap">
+  <h1>Deep Recurrent Nets character generation demo</h1>
+  <div id="intro">
+    This demo shows usage of the <a href="https://github.com/karpathy/recurrentjs">recurrentjs library</a> that allows you to train deep Recurrent Neural Networks (RNN) and Long Short-Term Memory Networks (LSTM) in Javascript. But the core of the library is more general and allows you to set up arbitrary expression graphs that support fully automatic backpropagation.<br><br>
+
+    In this demo we take a dataset of sentences as input and learn to memorize the sentences character by character. That is, the RNN/LSTM takes a character, its context from previous time steps (as mediated by the hidden layers) and predicts the next character in the sequence. Here is an example: <br><br>
+
+    <div style="text-align:center;"><img src="eg.png"></div>
+
+    In the example image above that depicts a deep RNN, every character has an associated "letter vector" that we will train with backpropagation. These letter vectors are combined through a (learnable) Matrix-vector multiply transformation into the first hidden layer representation (yellow), then into second hidden layer representation (purple), and finally into the output space (blue). The output space has dimensionality equal to the number of characters in the dataset and every dimension provides the probability of the next character in the sequence. The network is therefore trained to always predict the next character (using Softmax + cross-entropy loss on all letters). The quantity we track during training is called the <b>perplexity</b>, which measures how surprised the network is to see the next character in a sequence. For example, if perplexity is 4.0 then it's as if the network was guessing uniformly at random from 4 possible characters for next letter (i.e. lowest it can be is 1). At test time, the prediction is currently done iteratively character by character in a greedy fashion, but I might eventually implemented more sophisticated methods (e.g. beam search).<br><br>
+
+    The demo is pre-filled with sentences from <a href="http://www.paulgraham.com/articles.html">Paul Graham's essays</a>, in an attempt to encode Paul Graham's knowledge into the weights of the Recurrent Networks. The long-term goal of the project then is to generate startup wisdom at will. Feel free to train on whatever data you wish, and to experiment with the parameters. If you want more impressive models you have to increase the sizes of hidden layers, and maybe slightly the letter vectors. However, this will take longer to train.<br><br>
+
+    For suggestions/bugs ping me at <a href="https://twitter.com/karpathy">@karpathy</a>.<br><br>
+
+  </div>
+  <div>
+    <div class="hh">Input sentences:</div>
+    <textarea style="width:100%; height:200px;" id="ti"></textarea>
+  </div>
+  <div id="prepro_status"></div>
+
+  <div class="hh">Controls/Options:</div>
+  <button id="learn" class="abutton">learn/restart</button>
+  <button id="resume" class="abutton">resume</button>
+  <button id="stop" class="abutton">pause</button>
+  <!-- <button id="gradcheck">gradcheck</button> -->
+  <textarea id="newnet" style="width:100%; height:200px;">
+
+// model parameters
+generator = 'lstm'; // can be 'rnn' or 'lstm'
+hidden_sizes = [20,20]; // list of sizes of hidden layers
+letter_size = 5; // size of letter embeddings
+
+// optimization
+regc = 0.000001; // L2 regularization strength
+learning_rate = 0.01; // learning rate
+clipval = 5.0; // clip gradients at this value
+  </textarea><br />
+  protip: if your perplexity is exploding with Infinity try lowering the initial learning rate
+  <br>
+  <div id="status">
+
+    <div>
+      <div class="hh">Training stats:</div>
+      <div class="aslider">
+        <div class="slider_header">Learning rate: you want to anneal this over time if you're training for longer time.</div>
+        <div class="theslider" id="lr_slider"></div>
+        <div class="slider_value" id="lr_text"></div>
+      </div>
+
+      <canvas id="pplgraph"></canvas>
+      <div id="ticktime"></div>
+      <div id="gradclip"></div>
+      <div id="epoch"></div>
+      <div id="ppl"></div>
+
+      <div style="clear:both;"></div>
+    </div>
+
+    <div class="hh">Model samples:</div>
+    <div id="controls">
+      <div class="aslider">
+        <div class="slider_header">Softmax sample temperature: lower setting will generate more likely predictions, but you'll see more of the same common words again and again. Higher setting will generate less frequent words but you might see more spelling errors.</div>
+        <div class="theslider" id="temperature_slider"></div>
+        <div class="slider_value" id="temperature_text"></div>
+      </div>
+    </div>
+    <div id="samples"></div>
+    <div class="hh">Greedy argmax prediction:</div>
+    <div id="argmax"></div>
+  </div>
+  <div id="io">
+    <div class="hh">I/O save/load model JSON</div>
+
+    <button id="savemodel" class="abutton">save model</button>
+    <button id="loadmodel" class="abutton">load model</button>
+    <div>
+      You can save or load models with JSON using the textarea below.
+    </div>
+    <textarea style="width:100%; height:200px;" id="tio"></textarea>
+
+    <br>
+    <div class="hh">Pretrained model:</div>
+    You can also choose to load an example pretrained model with the button below to see what the predictions look like in later stages. The pretrained model is an LSTM with one layer of 100 units, trained for ~10 hours. After clicking button below you should see the perplexity plummet to about 3.0, and see the predictions become better.<br>
+    <button id="loadpretrained" class="abutton">load pretrained</button>
+
+  </div>
+</div>
+<script type="text/javascript">
+  function randomMath() {
+    var left = Math.floor(Math.random() * 10);
+    var right = Math.floor(Math.random() * 10);
+    return left + '+' + right + '=' + (left + right);
+  }
+  var inputs = [];
+  for (var i = 0;i < 1000; i++) {
+    inputs.push(randomMath());
+  }
+  document.getElementById('ti').innerHTML = inputs.join('\n');
+
+  // prediction params
+  var sample_softmax_temperature = 1.0; // how peaky model predictions should be
+  var max_chars_gen = 100; // max length of generated sentences
+
+  // various global var inits
+  var epoch_size = -1;
+  var input_size = -1;
+  var output_size = -1;
+  var letterToIndex = {};
+  var indexToLetter = {};
+  var vocab = [];
+  var data_sents = [];
+  var solver = new R.Solver(); // should be class because it needs memory for step caches
+  var pplGraph = new Rvis.Graph();
+
+  var model = {};
+
+  var initVocab = function(sents, count_threshold) {
+    // go over all characters and keep track of all unique ones seen
+    var txt = sents.join(''); // concat all
+
+    // count up all characters
+    var d = {};
+    for(var i=0,n=txt.length;i<n;i++) {
+      var txti = txt[i];
+      if(txti in d) { d[txti] += 1; }
+      else { d[txti] = 1; }
+    }
+
+    // filter by count threshold and create pointers
+    letterToIndex = {};
+    indexToLetter = {};
+    vocab = [];
+    // NOTE: start at one because we will have START and END tokens!
+    // that is, START token will be index 0 in model letter vectors
+    // and END token will be index 0 in the next character softmax
+    var q = 1;
+    for(ch in d) {
+      if(d.hasOwnProperty(ch)) {
+        if(d[ch] >= count_threshold) {
+          // add character to vocab
+          letterToIndex[ch] = q;
+          indexToLetter[q] = ch;
+          vocab.push(ch);
+          q++;
+        }
+      }
+    }
+
+    // globals written: indexToLetter, letterToIndex, vocab (list), and:
+    input_size = vocab.length + 1;
+    output_size = vocab.length + 1;
+    epoch_size = sents.length;
+    $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
+  }
+
+  var utilAddToModel = function(modelto, modelfrom) {
+    for(var k in modelfrom) {
+      if(modelfrom.hasOwnProperty(k)) {
+        // copy over the pointer but change the key to use the append
+        modelto[k] = modelfrom[k];
+      }
+    }
+  }
+
+  var initModel = function() {
+    // letter embedding vectors
+    var model = {};
+    model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
+
+    if(generator === 'rnn') {
+      var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
+      utilAddToModel(model, rnn);
+    } else {
+      var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
+      utilAddToModel(model, lstm);
+    }
+
+    return model;
+  }
+
+  var reinit_learning_rate_slider = function() {
+    // init learning rate slider for controlling the decay
+    // note that learning_rate is a global variable
+    $("#lr_slider").slider({
+      min: Math.log10(0.01) - 3.0,
+      max: Math.log10(0.01) + 0.05,
+      step: 0.05,
+      value: Math.log10(learning_rate),
+      slide: function( event, ui ) {
+        learning_rate = Math.pow(10, ui.value);
+        $("#lr_text").text(learning_rate.toFixed(5));
+      }
+    });
+    $("#lr_text").text(learning_rate.toFixed(5));
+  }
+
+  var reinit = function() {
+    // note: reinit writes global vars
+
+    // eval options to set some globals
+    eval($("#newnet").val());
+
+    reinit_learning_rate_slider();
+
+    solver = new R.Solver(); // reinit solver
+    pplGraph = new Rvis.Graph();
+
+    ppl_list = [];
+    tick_iter = 0;
+
+    // process the input, filter out blanks
+    var data_sents_raw = $('#ti').val().split('\n');
+    data_sents = [];
+    for(var i=0;i<data_sents_raw.length;i++) {
+      var sent = data_sents_raw[i].trim();
+      if(sent.length > 0) {
+        data_sents.push(sent);
+      }
+    }
+
+    initVocab(data_sents, 1); // takes count threshold for characters
+    model = initModel();
+  }
+
+  var saveModel = function() {
+    var out = {};
+    out['hidden_sizes'] = hidden_sizes;
+    out['generator'] = generator;
+    out['letter_size'] = letter_size;
+    var model_out = {};
+    for(var k in model) {
+      if(model.hasOwnProperty(k)) {
+        model_out[k] = model[k].toJSON();
+      }
+    }
+    out['model'] = model_out;
+    var solver_out = {};
+    solver_out['decay_rate'] = solver.decay_rate;
+    solver_out['smooth_eps'] = solver.smooth_eps;
+    step_cache_out = {};
+    for(var k in solver.step_cache) {
+      if(solver.step_cache.hasOwnProperty(k)) {
+        step_cache_out[k] = solver.step_cache[k].toJSON();
+      }
+    }
+    solver_out['step_cache'] = step_cache_out;
+    out['solver'] = solver_out;
+    out['letterToIndex'] = letterToIndex;
+    out['indexToLetter'] = indexToLetter;
+    out['vocab'] = vocab;
+    $("#tio").val(JSON.stringify(out));
+  }
+
+  var loadModel = function(j) {
+    hidden_sizes = j.hidden_sizes;
+    generator = j.generator;
+    letter_size = j.letter_size;
+    model = {};
+    for(var k in j.model) {
+      if(j.model.hasOwnProperty(k)) {
+        var matjson = j.model[k];
+        model[k] = new R.Mat(1,1);
+        model[k].fromJSON(matjson);
+      }
+    }
+    solver = new R.Solver(); // have to reinit the solver since model changed
+    solver.decay_rate = j.solver.decay_rate;
+    solver.smooth_eps = j.solver.smooth_eps;
+    solver.step_cache = {};
+    for(var k in j.solver.step_cache){
+      if(j.solver.step_cache.hasOwnProperty(k)){
+        var matjson = j.solver.step_cache[k];
+        solver.step_cache[k] = new R.Mat(1,1);
+        solver.step_cache[k].fromJSON(matjson);
+      }
+    }
+    letterToIndex = j['letterToIndex'];
+    indexToLetter = j['indexToLetter'];
+    vocab = j['vocab'];
+
+    // reinit these
+    ppl_list = [];
+    tick_iter = 0;
+  }
+
+  var forwardIndex = function(G, model, ix, prev) {
+    var x = G.rowPluck(model['Wil'], ix);
+    // forward prop the sequence learner
+    if(generator === 'rnn') {
+      var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
+    } else {
+      var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
+    }
+    return out_struct;
+  }
+
+  var predictSentence = function(model, samplei, temperature) {
+    if(typeof samplei === 'undefined') { samplei = false; }
+    if(typeof temperature === 'undefined') { temperature = 1.0; }
+
+    var G = new R.Graph(false);
+    var s = '';
+    var prev = {};
+    while(true) {
+
+      // RNN tick
+      var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
+      var lh = forwardIndex(G, model, ix, prev);
+      prev = lh;
+
+      // sample predicted letter
+      logprobs = lh.o;
+      if(temperature !== 1.0 && samplei) {
+        // scale log probabilities by temperature and renormalize
+        // if temperature is high, logprobs will go towards zero
+        // and the softmax outputs will be more diffuse. if temperature is
+        // very low, the softmax outputs will be more peaky
+        for(var q=0,nq=logprobs.w.length;q<nq;q++) {
+          logprobs.w[q] /= temperature;
+        }
+      }
+
+      probs = R.softmax(logprobs);
+      if(samplei) {
+        var ix = R.samplei(probs.w);
+      } else {
+        var ix = R.maxi(probs.w);
+      }
+
+      if(ix === 0) break; // END token predicted, break out
+      if(s.length > max_chars_gen) { break; } // something is wrong
+
+      var letter = indexToLetter[ix];
+      s += letter;
+    }
+    return s;
+  }
+
+  var costfun = function(model, sent) {
+    // takes a model and a sentence and
+    // calculates the loss. Also returns the Graph
+    // object which can be used to do backprop
+    var n = sent.length;
+    var G = new R.Graph();
+    var log2ppl = 0.0;
+    var cost = 0.0;
+    var prev = {};
+    for(var i=-1;i<n;i++) {
+      // start and end tokens are zeros
+      var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
+      var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
+
+      lh = forwardIndex(G, model, ix_source, prev);
+      prev = lh;
+
+      // set gradients into logprobabilities
+      logprobs = lh.o; // interpret output as logprobs
+      probs = R.softmax(logprobs); // compute the softmax probabilities
+
+      log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
+      cost += -Math.log(probs.w[ix_target]);
+
+      // write gradients into log probabilities
+      logprobs.dw = probs.w;
+      logprobs.dw[ix_target] -= 1
+    }
+    var ppl = Math.pow(2, log2ppl / (n - 1));
+    return {'G':G, 'ppl':ppl, 'cost':cost};
+  }
+
+  function median(values) {
+    values.sort( function(a,b) {return a - b;} );
+    var half = Math.floor(values.length/2);
+    if(values.length % 2) return values[half];
+    else return (values[half-1] + values[half]) / 2.0;
+  }
+
+  var ppl_list = [];
+  var tick_iter = 0;
+  var tick = function() {
+
+    // sample sentence fromd data
+    var sentix = R.randi(0,data_sents.length);
+    var sent = data_sents[sentix];
+
+    var t0 = +new Date();  // log start timestamp
+
+    // evaluate cost function on a sentence
+    var cost_struct = costfun(model, sent);
+
+    // use built up graph to compute backprop (set .dw fields in mats)
+    cost_struct.G.backward();
+    // perform param update
+    var solver_stats = solver.step(model, learning_rate, regc, clipval);
+    //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
+
+    var t1 = +new Date();
+    var tick_time = t1 - t0;
+
+    ppl_list.push(cost_struct.ppl); // keep track of perplexity
+
+    // evaluate now and then
+    tick_iter += 1;
+    if(tick_iter % 50 === 0) {
+      // draw samples
+      $('#samples').html('');
+      for(var q=0;q<5;q++) {
+        var pred = predictSentence(model, true, sample_softmax_temperature);
+        var pred_div = '<div class="apred">'+pred+'</div>'
+        $('#samples').append(pred_div);
+      }
+    }
+    if(tick_iter % 10 === 0) {
+      // draw argmax prediction
+      $('#argmax').html('');
+      var pred = predictSentence(model, false);
+      var pred_div = '<div class="apred">'+pred+'</div>'
+      $('#argmax').append(pred_div);
+
+      // keep track of perplexity
+      $('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
+      $('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
+      $('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
+
+      if(tick_iter % 100 === 0) {
+        var median_ppl = median(ppl_list);
+        ppl_list = [];
+        pplGraph.add(tick_iter, median_ppl);
+        pplGraph.drawSelf(document.getElementById("pplgraph"));
+      }
+    }
+  }
+
+  var gradCheck = function() {
+    var model = initModel();
+    var sent = '^test sentence$';
+    var cost_struct = costfun(model, sent);
+    cost_struct.G.backward();
+    var eps = 0.000001;
+
+    for(var k in model) {
+      if(model.hasOwnProperty(k)) {
+        var m = model[k]; // mat ref
+        for(var i=0,n=m.w.length;i<n;i++) {
+
+          oldval = m.w[i];
+          m.w[i] = oldval + eps;
+          var c0 = costfun(model, sent);
+          m.w[i] = oldval - eps;
+          var c1 = costfun(model, sent);
+          m.w[i] = oldval;
+
+          var gnum = (c0.cost - c1.cost)/(2 * eps);
+          var ganal = m.dw[i];
+          var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
+          if(relerr > 1e-1) {
+            console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
+          }
+        }
+      }
+    }
+  }
+
+  var iid = null;
+  $(function() {
+
+    // attach button handlers
+    $('#learn').click(function(){
+      reinit();
+      if(iid !== null) { clearInterval(iid); }
+      iid = setInterval(tick, 0);
+    });
+    $('#stop').click(function(){
+      if(iid !== null) { clearInterval(iid); }
+      iid = null;
+    });
+    $("#resume").click(function(){
+      if(iid === null) {
+        iid = setInterval(tick, 0);
+      }
+    });
+
+    $("#savemodel").click(saveModel);
+    $("#loadmodel").click(function(){
+      var j = JSON.parse($("#tio").val());
+      loadModel(j);
+    });
+
+    $("#loadpretrained").click(function(){
+      $.getJSON("lstm_100_model.json", function(data) {
+        pplGraph = new Rvis.Graph();
+        learning_rate = 0.0001;
+        reinit_learning_rate_slider();
+        loadModel(data);
+      });
+    });
+
+    $("#learn").click(); // simulate click on startup
+
+    //$('#gradcheck').click(gradCheck);
+
+    $("#temperature_slider").slider({
+      min: -1,
+      max: 1.05,
+      step: 0.05,
+      value: 0,
+      slide: function( event, ui ) {
+        sample_softmax_temperature = Math.pow(10, ui.value);
+        $("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
+      }
+    });
+  });
+
+</script>
+</body>
+</html>

From fc195616529c2a9dc909e0fa39299bf70b81bf25 Mon Sep 17 00:00:00 2001
From: Robert Plummer <robertleeplummerjr@gmail.com>
Date: Sun, 28 Aug 2016 21:58:01 -0400
Subject: [PATCH 3/5] add math demo

---
 math_demo.html | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/math_demo.html b/math_demo.html
index f3dc50f..512107c 100644
--- a/math_demo.html
+++ b/math_demo.html
@@ -1,6 +1,6 @@
 <html>
 <head>
-<title>RecurrentJS Sentence Memorization Demo</title>
+<title>RecurrentJS Math Demo</title>
 
 <style>
 body {
@@ -84,17 +84,17 @@
 
 
 <div id="wrap">
-  <h1>Deep Recurrent Nets character generation demo</h1>
+  <h1>Deep Recurrent Nets math demo</h1>
   <div id="intro">
     This demo shows usage of the <a href="https://github.com/karpathy/recurrentjs">recurrentjs library</a> that allows you to train deep Recurrent Neural Networks (RNN) and Long Short-Term Memory Networks (LSTM) in Javascript. But the core of the library is more general and allows you to set up arbitrary expression graphs that support fully automatic backpropagation.<br><br>
 
-    In this demo we take a dataset of sentences as input and learn to memorize the sentences character by character. That is, the RNN/LSTM takes a character, its context from previous time steps (as mediated by the hidden layers) and predicts the next character in the sequence. Here is an example: <br><br>
+    In this demo we take a dataset of random math characters as input and learn to memorize the math logic character by character. That is, the RNN/LSTM takes a character, its context from previous time steps (as mediated by the hidden layers) and predicts the next character in the sequence. Here is an example: <br><br>
 
     <div style="text-align:center;"><img src="eg.png"></div>
 
     In the example image above that depicts a deep RNN, every character has an associated "letter vector" that we will train with backpropagation. These letter vectors are combined through a (learnable) Matrix-vector multiply transformation into the first hidden layer representation (yellow), then into second hidden layer representation (purple), and finally into the output space (blue). The output space has dimensionality equal to the number of characters in the dataset and every dimension provides the probability of the next character in the sequence. The network is therefore trained to always predict the next character (using Softmax + cross-entropy loss on all letters). The quantity we track during training is called the <b>perplexity</b>, which measures how surprised the network is to see the next character in a sequence. For example, if perplexity is 4.0 then it's as if the network was guessing uniformly at random from 4 possible characters for next letter (i.e. lowest it can be is 1). At test time, the prediction is currently done iteratively character by character in a greedy fashion, but I might eventually implemented more sophisticated methods (e.g. beam search).<br><br>
 
-    The demo is pre-filled with sentences from <a href="http://www.paulgraham.com/articles.html">Paul Graham's essays</a>, in an attempt to encode Paul Graham's knowledge into the weights of the Recurrent Networks. The long-term goal of the project then is to generate startup wisdom at will. Feel free to train on whatever data you wish, and to experiment with the parameters. If you want more impressive models you have to increase the sizes of hidden layers, and maybe slightly the letter vectors. However, this will take longer to train.<br><br>
+    The demo is populated with random math from javascript.<br><br>
 
     For suggestions/bugs ping me at <a href="https://twitter.com/karpathy">@karpathy</a>.<br><br>
 

From 4d65fad2c20caa64c11f558b632c4aae1f605aca Mon Sep 17 00:00:00 2001
From: Robert Plummer <robertleeplummerjr@gmail.com>
Date: Mon, 17 Oct 2016 14:35:06 -0400
Subject: [PATCH 4/5] added xor demo

---
 math_demo.html   | 103 +++++---
 src/recurrent.js | 468 ++++++++++++++++++++---------------
 xor_demo.html    | 622 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 964 insertions(+), 229 deletions(-)
 create mode 100644 xor_demo.html

diff --git a/math_demo.html b/math_demo.html
index 512107c..21c25b0 100644
--- a/math_demo.html
+++ b/math_demo.html
@@ -115,7 +115,7 @@ <h1>Deep Recurrent Nets math demo</h1>
 // model parameters
 generator = 'lstm'; // can be 'rnn' or 'lstm'
 hidden_sizes = [20,20]; // list of sizes of hidden layers
-letter_size = 5; // size of letter embeddings
+letter_size = 6; // size of letter embeddings
 
 // optimization
 regc = 0.000001; // L2 regularization strength
@@ -173,15 +173,39 @@ <h1>Deep Recurrent Nets math demo</h1>
   </div>
 </div>
 <script type="text/javascript">
-  function randomMath() {
-    var left = Math.floor(Math.random() * 10);
-    var right = Math.floor(Math.random() * 10);
-    return left + '+' + right + '=' + (left + right);
+  function shuffle(array) {
+    var currentIndex = array.length, temporaryValue, randomIndex;
+
+    // While there remain elements to shuffle...
+    while (0 !== currentIndex) {
+      // Pick a remaining element...
+      randomIndex = Math.floor(Math.random() * currentIndex);
+      currentIndex -= 1;
+
+      // And swap it with the current element.
+      temporaryValue = array[currentIndex];
+      array[currentIndex] = array[randomIndex];
+      array[randomIndex] = temporaryValue;
+    }
+
+    return array;
   }
-  var inputs = [];
-  for (var i = 0;i < 1000; i++) {
-    inputs.push(randomMath());
+
+  function build() {
+    var items = [];
+    for (var i = 0; i <= 10; i++) {
+      for (var j = 0; j <= 10; j++) {
+        items.push(i + '+' + j + '=' + (i + j));
+        if (i === j) continue;
+        items.push(j + '+' + i + '=' + (i + j));
+      }
+    }
+    items.random = function() {
+      return items[Math.floor(Math.random() * items.length)];
+    };
+    return shuffle(items);
   }
+  var inputs = build();
   document.getElementById('ti').innerHTML = inputs.join('\n');
 
   // prediction params
@@ -238,7 +262,7 @@ <h1>Deep Recurrent Nets math demo</h1>
     output_size = vocab.length + 1;
     epoch_size = sents.length;
     $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
-  }
+  };
 
   var utilAddToModel = function(modelto, modelfrom) {
     for(var k in modelfrom) {
@@ -247,7 +271,7 @@ <h1>Deep Recurrent Nets math demo</h1>
         modelto[k] = modelfrom[k];
       }
     }
-  }
+  };
 
   var initModel = function() {
     // letter embedding vectors
@@ -263,7 +287,7 @@ <h1>Deep Recurrent Nets math demo</h1>
     }
 
     return model;
-  }
+  };
 
   var reinit_learning_rate_slider = function() {
     // init learning rate slider for controlling the decay
@@ -279,7 +303,7 @@ <h1>Deep Recurrent Nets math demo</h1>
       }
     });
     $("#lr_text").text(learning_rate.toFixed(5));
-  }
+  };
 
   var reinit = function() {
     // note: reinit writes global vars
@@ -307,7 +331,7 @@ <h1>Deep Recurrent Nets math demo</h1>
 
     initVocab(data_sents, 1); // takes count threshold for characters
     model = initModel();
-  }
+  };
 
   var saveModel = function() {
     var out = {};
@@ -324,8 +348,8 @@ <h1>Deep Recurrent Nets math demo</h1>
     var solver_out = {};
     solver_out['decay_rate'] = solver.decay_rate;
     solver_out['smooth_eps'] = solver.smooth_eps;
-    step_cache_out = {};
-    for(var k in solver.step_cache) {
+    var step_cache_out = {};
+    for(k in solver.step_cache) {
       if(solver.step_cache.hasOwnProperty(k)) {
         step_cache_out[k] = solver.step_cache[k].toJSON();
       }
@@ -335,8 +359,13 @@ <h1>Deep Recurrent Nets math demo</h1>
     out['letterToIndex'] = letterToIndex;
     out['indexToLetter'] = indexToLetter;
     out['vocab'] = vocab;
-    $("#tio").val(JSON.stringify(out));
-  }
+    return JSON.stringify(out);
+  };
+
+  var hidden_sizes;
+  var generator;
+  var letter_size;
+  var learning_rate;
 
   var loadModel = function(j) {
     hidden_sizes = j.hidden_sizes;
@@ -354,9 +383,9 @@ <h1>Deep Recurrent Nets math demo</h1>
     solver.decay_rate = j.solver.decay_rate;
     solver.smooth_eps = j.solver.smooth_eps;
     solver.step_cache = {};
-    for(var k in j.solver.step_cache){
+    for(k in j.solver.step_cache){
       if(j.solver.step_cache.hasOwnProperty(k)){
-        var matjson = j.solver.step_cache[k];
+        matjson = j.solver.step_cache[k];
         solver.step_cache[k] = new R.Mat(1,1);
         solver.step_cache[k].fromJSON(matjson);
       }
@@ -368,18 +397,17 @@ <h1>Deep Recurrent Nets math demo</h1>
     // reinit these
     ppl_list = [];
     tick_iter = 0;
-  }
+  };
 
   var forwardIndex = function(G, model, ix, prev) {
     var x = G.rowPluck(model['Wil'], ix);
     // forward prop the sequence learner
     if(generator === 'rnn') {
-      var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
+      return R.forwardRNN(G, model, hidden_sizes, x, prev);
     } else {
-      var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
+      return R.forwardLSTM(G, model, hidden_sizes, x, prev);
     }
-    return out_struct;
-  }
+  };
 
   var predictSentence = function(model, samplei, temperature) {
     if(typeof samplei === 'undefined') { samplei = false; }
@@ -396,7 +424,7 @@ <h1>Deep Recurrent Nets math demo</h1>
       prev = lh;
 
       // sample predicted letter
-      logprobs = lh.o;
+      var logprobs = lh.o;
       if(temperature !== 1.0 && samplei) {
         // scale log probabilities by temperature and renormalize
         // if temperature is high, logprobs will go towards zero
@@ -407,11 +435,11 @@ <h1>Deep Recurrent Nets math demo</h1>
         }
       }
 
-      probs = R.softmax(logprobs);
+      var probs = R.softmax(logprobs);
       if(samplei) {
-        var ix = R.samplei(probs.w);
+        ix = R.samplei(probs.w);
       } else {
-        var ix = R.maxi(probs.w);
+        ix = R.maxi(probs.w);
       }
 
       if(ix === 0) break; // END token predicted, break out
@@ -421,7 +449,7 @@ <h1>Deep Recurrent Nets math demo</h1>
       s += letter;
     }
     return s;
-  }
+  };
 
   var costfun = function(model, sent) {
     // takes a model and a sentence and
@@ -437,12 +465,12 @@ <h1>Deep Recurrent Nets math demo</h1>
       var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
       var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
 
-      lh = forwardIndex(G, model, ix_source, prev);
+      var lh = forwardIndex(G, model, ix_source, prev);
       prev = lh;
 
       // set gradients into logprobabilities
-      logprobs = lh.o; // interpret output as logprobs
-      probs = R.softmax(logprobs); // compute the softmax probabilities
+      var logprobs = lh.o; // interpret output as logprobs
+      var probs = R.softmax(logprobs); // compute the softmax probabilities
 
       log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
       cost += -Math.log(probs.w[ix_target]);
@@ -453,7 +481,7 @@ <h1>Deep Recurrent Nets math demo</h1>
     }
     var ppl = Math.pow(2, log2ppl / (n - 1));
     return {'G':G, 'ppl':ppl, 'cost':cost};
-  }
+  };
 
   function median(values) {
     values.sort( function(a,b) {return a - b;} );
@@ -477,6 +505,7 @@ <h1>Deep Recurrent Nets math demo</h1>
 
     // use built up graph to compute backprop (set .dw fields in mats)
     cost_struct.G.backward();
+
     // perform param update
     var solver_stats = solver.step(model, learning_rate, regc, clipval);
     //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
@@ -516,7 +545,7 @@ <h1>Deep Recurrent Nets math demo</h1>
         pplGraph.drawSelf(document.getElementById("pplgraph"));
       }
     }
-  }
+  };
 
   var gradCheck = function() {
     var model = initModel();
@@ -546,7 +575,7 @@ <h1>Deep Recurrent Nets math demo</h1>
         }
       }
     }
-  }
+  };
 
   var iid = null;
   $(function() {
@@ -556,6 +585,10 @@ <h1>Deep Recurrent Nets math demo</h1>
       reinit();
       if(iid !== null) { clearInterval(iid); }
       iid = setInterval(tick, 0);
+      //iid = setTimeout(tick, 0);
+      //setTimeout(function() {
+        //console.log(saveModel());
+      //}, 1000);
     });
     $('#stop').click(function(){
       if(iid !== null) { clearInterval(iid); }
diff --git a/src/recurrent.js b/src/recurrent.js
index 4c20a39..f5adc87 100644
--- a/src/recurrent.js
+++ b/src/recurrent.js
@@ -1,6 +1,6 @@
 var R = {}; // the Recurrent library
 
-(function(global) {
+(function (global) {
   "use strict";
 
   // Utility fun
@@ -18,341 +18,417 @@ var R = {}; // the Recurrent library
   // Random numbers utils
   var return_v = false;
   var v_val = 0.0;
-  var gaussRandom = function() {
-    if(return_v) { 
+  var gaussRandom = function () {
+    if (return_v) {
       return_v = false;
-      return v_val; 
+      return v_val;
     }
-    var u = 2*Math.random()-1;
-    var v = 2*Math.random()-1;
-    var r = u*u + v*v;
-    if(r == 0 || r > 1) return gaussRandom();
-    var c = Math.sqrt(-2*Math.log(r)/r);
-    v_val = v*c; // cache this
+    var u = 2 * Math.random() - 1;
+    var v = 2 * Math.random() - 1;
+    var r = u * u + v * v;
+    if (r == 0 || r > 1) return gaussRandom();
+    var c = Math.sqrt(-2 * Math.log(r) / r);
+    v_val = v * c; // cache this
     return_v = true;
-    return u*c;
-  }
-  var randf = function(a, b) { return Math.random()*(b-a)+a; }
-  var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); }
-  var randn = function(mu, std){ return mu+gaussRandom()*std; }
+    return u * c;
+  };
+  var randf = function (a, b) {
+    return Math.random() * (b - a) + a;
+  };
+  var randi = function (a, b) {
+    return Math.floor(Math.random() * (b - a) + a);
+  };
+  var randn = function (mu, std) {
+    return mu + gaussRandom() * std;
+  };
 
   // helper function returns array of zeros of length n
   // and uses typed arrays if available
-  var zeros = function(n) {
-    if(typeof(n)==='undefined' || isNaN(n)) { return []; }
-    if(typeof ArrayBuffer === 'undefined') {
+  var zeros = function (n) {
+    if (typeof(n) === 'undefined' || isNaN(n)) {
+      return [];
+    }
+    if (typeof ArrayBuffer === 'undefined') {
       // lacking browser support
       var arr = new Array(n);
-      for(var i=0;i<n;i++) { arr[i] = 0; }
+      for (var i = 0; i < n; i++) {
+        arr[i] = 0;
+      }
       return arr;
     } else {
       return new Float64Array(n);
     }
-  }
+  };
 
   // Mat holds a matrix
-  var Mat = function(n,d) {
+  var Mat = function (n, d) {
     // n is number of rows d is number of columns
     this.n = n;
     this.d = d;
     this.w = zeros(n * d);
     this.dw = zeros(n * d);
-  }
+  };
+
   Mat.prototype = {
-    get: function(row, col) { 
+    get: function (row, col) {
       // slow but careful accessor function
       // we want row-major order
       var ix = (this.d * row) + col;
       assert(ix >= 0 && ix < this.w.length);
       return this.w[ix];
     },
-    set: function(row, col, v) {
+    set: function (row, col, v) {
       // slow but careful accessor function
       var ix = (this.d * row) + col;
       assert(ix >= 0 && ix < this.w.length);
-      this.w[ix] = v; 
+      this.w[ix] = v;
     },
-    toJSON: function() {
+    toJSON: function () {
       var json = {};
       json['n'] = this.n;
       json['d'] = this.d;
       json['w'] = this.w;
       return json;
     },
-    fromJSON: function(json) {
+    fromJSON: function (json) {
       this.n = json.n;
       this.d = json.d;
       this.w = zeros(this.n * this.d);
       this.dw = zeros(this.n * this.d);
-      for(var i=0,n=this.n * this.d;i<n;i++) {
+      for (var i = 0, n = this.n * this.d; i < n; i++) {
         this.w[i] = json.w[i]; // copy over weights
       }
     }
-  }
+  };
 
   // return Mat but filled with random numbers from gaussian
-  var RandMat = function(n,d,mu,std) {
+  var RandMat = function (n, d, mu, std) {
     var m = new Mat(n, d);
     //fillRandn(m,mu,std);
-    fillRand(m,-std,std); // kind of :P
+    fillRand(m, -std, std); // kind of :P
     return m;
-  }
+  };
 
   // Mat utils
   // fill matrix with random gaussian numbers
-  var fillRandn = function(m, mu, std) { for(var i=0,n=m.w.length;i<n;i++) { m.w[i] = randn(mu, std); } }
-  var fillRand = function(m, lo, hi) { for(var i=0,n=m.w.length;i<n;i++) { m.w[i] = randf(lo, hi); } }
+  var fillRandn = function (m, mu, std) {
+    for (var i = 0, n = m.w.length; i < n; i++) {
+      m.w[i] = randn(mu, std);
+    }
+  };
+  var fillRand = function (m, lo, hi) {
+    for (var i = 0, n = m.w.length; i < n; i++) {
+      m.w[i] = randf(lo, hi);
+    }
+  };
 
   // Transformer definitions
-  var Graph = function(needs_backprop) {
-    if(typeof needs_backprop === 'undefined') { needs_backprop = true; }
+  var Graph = function (needs_backprop) {
+    if (typeof needs_backprop === 'undefined') {
+      needs_backprop = true;
+    }
     this.needs_backprop = needs_backprop;
 
     // this will store a list of functions that perform backprop,
     // in their forward pass order. So in backprop we will go
     // backwards and evoke each one
     this.backprop = [];
-  }
+  };
   Graph.prototype = {
-    backward: function() {
-      for(var i=this.backprop.length-1;i>=0;i--) {
+    backward: function () {
+      for (var i = this.backprop.length - 1; i >= 0; i--) {
         this.backprop[i](); // tick!
       }
     },
-    rowPluck: function(m, ix) {
+    rowPluck: function (m, ix) {
       // pluck a row of m with index ix and return it as col vector
       assert(ix >= 0 && ix < m.n);
       var d = m.d;
       var out = new Mat(d, 1);
-      for(var i=0,n=d;i<n;i++){ out.w[i] = m.w[d * ix + i]; } // copy over the data
 
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0,n=d;i<n;i++){ m.dw[d * ix + i] += out.dw[i]; }
+      dwListenAll(out, m);
+
+      for (var i = 0, n = d; i < n; i++) {
+        out.w[i] = m.w[d * ix + i];
+      } // copy over the data
+
+      if (this.needs_backprop) {
+        var backward = function () {
+          //rowPluch
+          for (var i = 0, n = d; i < n; i++) {
+            m.dw[d * ix + i] += out.dw[i];
+          }
         }
         this.backprop.push(backward);
       }
       return out;
     },
-    tanh: function(m) {
+    tanh: function (m) {
       // tanh nonlinearity
       var out = new Mat(m.n, m.d);
       var n = m.w.length;
-      for(var i=0;i<n;i++) { 
+      for (var i = 0; i < n; i++) {
         out.w[i] = Math.tanh(m.w[i]);
       }
 
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0;i<n;i++) {
+      dwListenAll(out, m);
+
+      if (this.needs_backprop) {
+        var backward = function () {
+          //tanh
+          for (var i = 0; i < n; i++) {
             // grad for z = tanh(x) is (1 - z^2)
             var mwi = out.w[i];
             m.dw[i] += (1.0 - mwi * mwi) * out.dw[i];
           }
-        }
+        };
         this.backprop.push(backward);
       }
       return out;
     },
-    sigmoid: function(m) {
+    sigmoid: function (m) {
       // sigmoid nonlinearity
       var out = new Mat(m.n, m.d);
       var n = m.w.length;
-      for(var i=0;i<n;i++) { 
+      for (var i = 0; i < n; i++) {
         out.w[i] = sig(m.w[i]);
       }
 
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0;i<n;i++) {
+      dwListenAll(out, m);
+
+      if (this.needs_backprop) {
+        var backward = function () {
+          //sigmoid
+          for (var i = 0; i < n; i++) {
             // grad for z = tanh(x) is (1 - z^2)
             var mwi = out.w[i];
             m.dw[i] += mwi * (1.0 - mwi) * out.dw[i];
           }
-        }
+        };
         this.backprop.push(backward);
       }
       return out;
     },
-    relu: function(m) {
+    relu: function (m) {
       var out = new Mat(m.n, m.d);
       var n = m.w.length;
-      for(var i=0;i<n;i++) { 
+      for (var i = 0; i < n; i++) {
         out.w[i] = Math.max(0, m.w[i]); // relu
       }
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0;i<n;i++) {
+      dwListenAll(out, m);
+      if (this.needs_backprop) {
+        var backward = function () {
+          //relu
+          for (var i = 0; i < n; i++) {
             m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
           }
-        }
+        };
         this.backprop.push(backward);
       }
       return out;
     },
-    mul: function(m1, m2) {
+    mul: function (m1, m2) {
       // multiply matrices m1 * m2
       assert(m1.d === m2.n, 'matmul dimensions misaligned');
 
       var n = m1.n;
       var d = m2.d;
-      var out = new Mat(n,d);
-      for(var i=0;i<m1.n;i++) { // loop over rows of m1
-        for(var j=0;j<m2.d;j++) { // loop over cols of m2
+      var out = new Mat(n, d);
+      dwListenAll(out, m1, m2);
+      for (var i = 0; i < m1.n; i++) { // loop over rows of m1
+        for (var j = 0; j < m2.d; j++) { // loop over cols of m2
           var dot = 0.0;
-          for(var k=0;k<m1.d;k++) { // dot product loop
-            dot += m1.w[m1.d*i+k] * m2.w[m2.d*k+j];
+          for (var k = 0; k < m1.d; k++) { // dot product loop
+            dot += m1.w[m1.d * i + k] * m2.w[m2.d * k + j];
           }
-          out.w[d*i+j] = dot;
+          out.w[d * i + j] = dot;
         }
       }
 
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0;i<m1.n;i++) { // loop over rows of m1
-            for(var j=0;j<m2.d;j++) { // loop over cols of m2
-              for(var k=0;k<m1.d;k++) { // dot product loop
-                var b = out.dw[d*i+j];
-                m1.dw[m1.d*i+k] += m2.w[m2.d*k+j] * b;
-                m2.dw[m2.d*k+j] += m1.w[m1.d*i+k] * b;
+      if (this.needs_backprop) {
+        var backward = function () {
+          //mul
+          for (var i = 0; i < m1.n; i++) { // loop over rows of m1
+            for (var j = 0; j < m2.d; j++) { // loop over cols of m2
+              for (var k = 0; k < m1.d; k++) { // dot product loop
+                var b = out.dw[d * i + j];
+                m1.dw[m1.d * i + k] += m2.w[m2.d * k + j] * b;
+                m2.dw[m2.d * k + j] += m1.w[m1.d * i + k] * b;
               }
             }
           }
-        }
+        };
         this.backprop.push(backward);
       }
       return out;
     },
-    add: function(m1, m2) {
+    add: function (m1, m2) {
       assert(m1.w.length === m2.w.length);
 
       var out = new Mat(m1.n, m1.d);
-      for(var i=0,n=m1.w.length;i<n;i++) {
+
+      dwListenAll(out, m1, m2);
+      for (var i = 0, n = m1.w.length; i < n; i++) {
         out.w[i] = m1.w[i] + m2.w[i];
       }
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0,n=m1.w.length;i<n;i++) {
+      if (this.needs_backprop) {
+        var backward = function () {
+          //add
+          for (var i = 0, n = m1.w.length; i < n; i++) {
             m1.dw[i] += out.dw[i];
             m2.dw[i] += out.dw[i];
           }
-        }
+        };
         this.backprop.push(backward);
       }
       return out;
     },
-    eltmul: function(m1, m2) {
+    eltmul: function (m1, m2) {
       assert(m1.w.length === m2.w.length);
 
-      var out = new Mat(m1.n, m1.d);
-      for(var i=0,n=m1.w.length;i<n;i++) {
+      var out = new Mat(m1.n, m1.d)
+      dwListenAll(out, m1, m2);
+      for (var i = 0, n = m1.w.length; i < n; i++) {
         out.w[i] = m1.w[i] * m2.w[i];
       }
-      if(this.needs_backprop) {
-        var backward = function() {
-          for(var i=0,n=m1.w.length;i<n;i++) {
+      if (this.needs_backprop) {
+        var backward = function () {
+          //eltmul
+          for (var i = 0, n = m1.w.length; i < n; i++) {
             m1.dw[i] += m2.w[i] * out.dw[i];
             m2.dw[i] += m1.w[i] * out.dw[i];
           }
-        }
+        };
         this.backprop.push(backward);
       }
       return out;
-    },
+    }
+  };
+
+  function dwListenAll() {
+    return;
+    for (var i = 0; i < arguments.length; i++) {
+      dwListen(arguments[i]);
+    }
   }
 
-  var softmax = function(m) {
-      var out = new Mat(m.n, m.d); // probability volume
-      var maxval = -999999;
-      for(var i=0,n=m.w.length;i<n;i++) { if(m.w[i] > maxval) maxval = m.w[i]; }
+  function dwListen(m) {
+    if (m._dw) return;
+    m._dw = m.dw;
+    m.dw = {
+      length: m._dw.length
+    };
+    m._dw.forEach(function (value, i) {
+      (function (i) {
+        m.dw.__defineSetter__(i.toString(), function (value) {
+          m._dw[i] = value;
+        });
+        m.dw.__defineGetter__(i.toString(), function () {
+          return m._dw[i];
+        });
+      })(i)
+    });
+  }
 
-      var s = 0.0;
-      for(var i=0,n=m.w.length;i<n;i++) { 
-        out.w[i] = Math.exp(m.w[i] - maxval);
-        s += out.w[i];
-      }
-      for(var i=0,n=m.w.length;i<n;i++) { out.w[i] /= s; }
+  var softmax = function (m) {
+    var out = new Mat(m.n, m.d); // probability volume
+    dwListenAll(out, m);
+    var maxval = -999999;
+    for (var i = 0, n = m.w.length; i < n; i++) {
+      if (m.w[i] > maxval) maxval = m.w[i];
+    }
 
-      // no backward pass here needed
-      // since we will use the computed probabilities outside
-      // to set gradients directly on m
-      return out;
+    var s = 0.0;
+    for (var i = 0, n = m.w.length; i < n; i++) {
+      out.w[i] = Math.exp(m.w[i] - maxval);
+      s += out.w[i];
     }
+    for (var i = 0, n = m.w.length; i < n; i++) {
+      out.w[i] /= s;
+    }
+
+    // no backward pass here needed
+    // since we will use the computed probabilities outside
+    // to set gradients directly on m
+    return out;
+  };
 
-  var Solver = function() {
+  var Solver = function () {
     this.decay_rate = 0.999;
     this.smooth_eps = 1e-8;
     this.step_cache = {};
-  }
+  };
   Solver.prototype = {
-    step: function(model, step_size, regc, clipval) {
+    step: function (model, step_size, regc, clipval) {
       // perform parameter update
       var solver_stats = {};
       var num_clipped = 0;
       var num_tot = 0;
-      for(var k in model) {
-        if(model.hasOwnProperty(k)) {
+      for (var k in model) {
+        if (model.hasOwnProperty(k)) {
           var m = model[k]; // mat ref
-          if(!(k in this.step_cache)) { this.step_cache[k] = new Mat(m.n, m.d); }
+          if (!(k in this.step_cache)) {
+            this.step_cache[k] = new Mat(m.n, m.d);
+          }
           var s = this.step_cache[k];
-          for(var i=0,n=m.w.length;i<n;i++) {
+          for (var i = 0, n = m.w.length; i < n; i++) {
 
             // rmsprop adaptive learning rate
             var mdwi = m.dw[i];
             s.w[i] = s.w[i] * this.decay_rate + (1.0 - this.decay_rate) * mdwi * mdwi;
 
             // gradient clip
-            if(mdwi > clipval) {
+            if (mdwi > clipval) {
               mdwi = clipval;
               num_clipped++;
             }
-            if(mdwi < -clipval) {
+            if (mdwi < -clipval) {
               mdwi = -clipval;
               num_clipped++;
             }
             num_tot++;
 
             // update (and regularize)
-            m.w[i] += - step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i];
+            m.w[i] += -step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i];
             m.dw[i] = 0; // reset gradients for next iteration
           }
         }
       }
-      solver_stats['ratio_clipped'] = num_clipped*1.0/num_tot;
+      solver_stats['ratio_clipped'] = num_clipped * 1.0 / num_tot;
       return solver_stats;
     }
-  }
+  };
 
-  var initLSTM = function(input_size, hidden_sizes, output_size) {
+  var initLSTM = function (input_size, hidden_sizes, output_size) {
     // hidden size should be a list
 
     var model = {};
-    for(var d=0;d<hidden_sizes.length;d++) { // loop over depths
+    for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
       var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
       var hidden_size = hidden_sizes[d];
 
       // gates parameters
-      model['Wix'+d] = new RandMat(hidden_size, prev_size , 0, 0.08);  
-      model['Wih'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08);
-      model['bi'+d] = new Mat(hidden_size, 1);
-      model['Wfx'+d] = new RandMat(hidden_size, prev_size , 0, 0.08);  
-      model['Wfh'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08);
-      model['bf'+d] = new Mat(hidden_size, 1);
-      model['Wox'+d] = new RandMat(hidden_size, prev_size , 0, 0.08);  
-      model['Woh'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08);
-      model['bo'+d] = new Mat(hidden_size, 1);
+      model['Wix' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+      model['Wih' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+      model['bi' + d] = new Mat(hidden_size, 1);
+      model['Wfx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+      model['Wfh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+      model['bf' + d] = new Mat(hidden_size, 1);
+      model['Wox' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+      model['Woh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+      model['bo' + d] = new Mat(hidden_size, 1);
       // cell write params
-      model['Wcx'+d] = new RandMat(hidden_size, prev_size , 0, 0.08);  
-      model['Wch'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08);
-      model['bc'+d] = new Mat(hidden_size, 1);
+      model['Wcx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+      model['Wch' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+      model['bc' + d] = new Mat(hidden_size, 1);
     }
     // decoder params
     model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
     model['bd'] = new Mat(output_size, 1);
     return model;
-  }
+  };
 
-  var forwardLSTM = function(G, model, hidden_sizes, x, prev) {
+  var forwardLSTM = function (G, model, hidden_sizes, x, prev) {
     // forward prop for a single tick of LSTM
     // G is graph to append ops to
     // model contains LSTM parameters
@@ -360,45 +436,47 @@ var R = {}; // the Recurrent library
     // prev is a struct containing hidden and cell
     // from previous iteration
 
-    if(typeof prev.h === 'undefined') {
-      var hidden_prevs = [];
-      var cell_prevs = [];
-      for(var d=0;d<hidden_sizes.length;d++) {
-        hidden_prevs.push(new R.Mat(hidden_sizes[d],1)); 
-        cell_prevs.push(new R.Mat(hidden_sizes[d],1)); 
+    var hidden_prevs;
+    var cell_prevs;
+    if (typeof prev.h === 'undefined') {
+      hidden_prevs = [];
+      cell_prevs = [];
+      for (var d = 0; d < hidden_sizes.length; d++) {
+        hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
+        cell_prevs.push(new R.Mat(hidden_sizes[d], 1));
       }
     } else {
-      var hidden_prevs = prev.h;
-      var cell_prevs = prev.c;
+      hidden_prevs = prev.h;
+      cell_prevs = prev.c;
     }
 
     var hidden = [];
     var cell = [];
-    for(var d=0;d<hidden_sizes.length;d++) {
+    for (var d = 0; d < hidden_sizes.length; d++) {
 
-      var input_vector = d === 0 ? x : hidden[d-1];
+      var input_vector = d === 0 ? x : hidden[d - 1];
       var hidden_prev = hidden_prevs[d];
       var cell_prev = cell_prevs[d];
 
       // input gate
-      var h0 = G.mul(model['Wix'+d], input_vector);
-      var h1 = G.mul(model['Wih'+d], hidden_prev);
-      var input_gate = G.sigmoid(G.add(G.add(h0,h1),model['bi'+d]));
+      var h0 = G.mul(model['Wix' + d], input_vector);
+      var h1 = G.mul(model['Wih' + d], hidden_prev);
+      var input_gate = G.sigmoid(G.add(G.add(h0, h1), model['bi' + d]));
 
       // forget gate
-      var h2 = G.mul(model['Wfx'+d], input_vector);
-      var h3 = G.mul(model['Wfh'+d], hidden_prev);
-      var forget_gate = G.sigmoid(G.add(G.add(h2, h3),model['bf'+d]));
+      var h2 = G.mul(model['Wfx' + d], input_vector);
+      var h3 = G.mul(model['Wfh' + d], hidden_prev);
+      var forget_gate = G.sigmoid(G.add(G.add(h2, h3), model['bf' + d]));
 
       // output gate
-      var h4 = G.mul(model['Wox'+d], input_vector);
-      var h5 = G.mul(model['Woh'+d], hidden_prev);
-      var output_gate = G.sigmoid(G.add(G.add(h4, h5),model['bo'+d]));
+      var h4 = G.mul(model['Wox' + d], input_vector);
+      var h5 = G.mul(model['Woh' + d], hidden_prev);
+      var output_gate = G.sigmoid(G.add(G.add(h4, h5), model['bo' + d]));
 
       // write operation on cells
-      var h6 = G.mul(model['Wcx'+d], input_vector);
-      var h7 = G.mul(model['Wch'+d], hidden_prev);
-      var cell_write = G.tanh(G.add(G.add(h6, h7),model['bc'+d]));
+      var h6 = G.mul(model['Wcx' + d], input_vector);
+      var h7 = G.mul(model['Wch' + d], hidden_prev);
+      var cell_write = G.tanh(G.add(G.add(h6, h7), model['bc' + d]));
 
       // compute new cell activation
       var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we keep from cell
@@ -413,97 +491,99 @@ var R = {}; // the Recurrent library
     }
 
     // one decoder to outputs at end
-    var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]),model['bd']);
+    var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]), model['bd']);
 
     // return cell memory, hidden representation and output
-    return {'h':hidden, 'c':cell, 'o' : output};
-  }
+    return {'h': hidden, 'c': cell, 'o': output};
+  };
 
-  var initRNN = function(input_size, hidden_sizes, output_size) {
+  var initRNN = function (input_size, hidden_sizes, output_size) {
     // hidden size should be a list
 
     var model = {};
-    for(var d=0;d<hidden_sizes.length;d++) { // loop over depths
+    for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
       var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
       var hidden_size = hidden_sizes[d];
-      model['Wxh'+d] = new R.RandMat(hidden_size, prev_size , 0, 0.08);
-      model['Whh'+d] = new R.RandMat(hidden_size, hidden_size, 0, 0.08);
-      model['bhh'+d] = new R.Mat(hidden_size, 1);
+      model['Wxh' + d] = new R.RandMat(hidden_size, prev_size, 0, 0.08);
+      model['Whh' + d] = new R.RandMat(hidden_size, hidden_size, 0, 0.08);
+      model['Whh' + d].is = true;
+      model['bhh' + d] = new R.Mat(hidden_size, 1);
     }
     // decoder params
     model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
     model['bd'] = new Mat(output_size, 1);
     return model;
-  }
+  };
 
-   var forwardRNN = function(G, model, hidden_sizes, x, prev) {
+  var forwardRNN = function (G, model, hidden_sizes, x, prev) {
     // forward prop for a single tick of RNN
     // G is graph to append ops to
     // model contains RNN parameters
     // x is 1D column vector with observation
     // prev is a struct containing hidden activations from last step
-
-    if(typeof prev.h === 'undefined') {
-      var hidden_prevs = [];
-      for(var d=0;d<hidden_sizes.length;d++) {
-        hidden_prevs.push(new R.Mat(hidden_sizes[d],1)); 
+    var hidden_prevs;
+    if (typeof prev.h === 'undefined') {
+      hidden_prevs = [];
+      for (var d = 0; d < hidden_sizes.length; d++) {
+        hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
       }
     } else {
-      var hidden_prevs = prev.h;
+      hidden_prevs = prev.h;
     }
 
     var hidden = [];
-    for(var d=0;d<hidden_sizes.length;d++) {
+    for (var d = 0; d < hidden_sizes.length; d++) {
 
-      var input_vector = d === 0 ? x : hidden[d-1];
+      var input_vector = d === 0 ? x : hidden[d - 1];
       var hidden_prev = hidden_prevs[d];
 
-      var h0 = G.mul(model['Wxh'+d], input_vector);
-      var h1 = G.mul(model['Whh'+d], hidden_prev);
-      var hidden_d = G.relu(G.add(G.add(h0, h1), model['bhh'+d]));
+      var h0 = G.mul(model['Wxh' + d], input_vector);
+      var h1 = G.mul(model['Whh' + d], hidden_prev);
+      var hidden_d = G.relu(G.add(G.add(h0, h1), model['bhh' + d]));
 
       hidden.push(hidden_d);
     }
 
     // one decoder to outputs at end
-    var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]),model['bd']);
+    var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]), model['bd']);
 
     // return cell memory, hidden representation and output
-    return {'h':hidden, 'o' : output};
-  }
+    return {'h': hidden, 'o': output};
+  };
 
-  var sig = function(x) {
+  var sig = function (x) {
     // helper function for computing sigmoid
-    return 1.0/(1+Math.exp(-x));
-  }
+    return 1.0 / (1 + Math.exp(-x));
+  };
 
-  var maxi = function(w) {
+  var maxi = function (w) {
     // argmax of array w
     var maxv = w[0];
     var maxix = 0;
-    for(var i=1,n=w.length;i<n;i++) {
+    for (var i = 1, n = w.length; i < n; i++) {
       var v = w[i];
-      if(v > maxv) {
+      if (v > maxv) {
         maxix = i;
         maxv = v;
       }
     }
     return maxix;
-  }
+  };
 
-  var samplei = function(w) {
+  var samplei = function (w) {
     // sample argmax from w, assuming w are 
     // probabilities that sum to one
-    var r = randf(0,1);
+    var r = randf(0, 1);
     var x = 0.0;
     var i = 0;
-    while(true) {
+    while (true) {
       x += w[i];
-      if(x > r) { return i; }
+      if (x > r) {
+        return i;
+      }
       i++;
     }
-    return w.length - 1; // pretty sure we should never get here?
-  }
+  };
 
   // various utils
   global.maxi = maxi;
@@ -524,5 +604,5 @@ var R = {}; // the Recurrent library
   // optimization
   global.Solver = Solver;
   global.Graph = Graph;
-  
+
 })(R);
diff --git a/xor_demo.html b/xor_demo.html
new file mode 100644
index 0000000..7423e2e
--- /dev/null
+++ b/xor_demo.html
@@ -0,0 +1,622 @@
+<html>
+<head>
+<title>RecurrentJS Math Demo</title>
+
+<style>
+body {
+  font-family: Arial, "Helvetica Neue", Helvetica, sans-serif;
+  color: #333;
+  padding: 20px;
+}
+#argmax {
+  background-color: #DFD;
+}
+#ppl {
+  color: #090;
+  font-size: 20px;
+}
+#epoch {
+  color: #900;
+  font-size: 20px;
+}
+.apred {
+  padding: 2px;
+  margin: 5px;
+  overflow: hidden;
+  height: 20px;
+  font-size: 14px;
+}
+#prepro_status {
+  background-color: #FFD;
+  padding: 5px;
+}
+#status {
+  padding: 2px;
+  margin-top: 5px;
+}
+#controls {
+  margin: 5px;
+}
+.theslider {
+  width:90%;
+  display: inline-block;
+}
+.slider_value {
+  width: 9%;
+  display: inline-block;
+}
+#wrap {
+  width: 800px;
+  margin-right: auto;
+  margin-left: auto;
+  margin-bottom: 200px;
+}
+.abutton {
+  width: 120px;
+  height: 30px;
+  margin: 10px 10px 10px 0px;
+}
+.hh {
+  background-color: #EEE;
+  padding: 5px;
+  margin-top: 5px;
+  border-bottom: 1px solid #999;
+  margin-bottom: 2px;
+}
+#pplgraph {
+  float: right;
+}
+#intro {
+  text-align: justify;
+}
+</style>
+<link href="external/jquery-ui.min.css" rel="stylesheet">
+
+<script src="external/jquery-1.8.3.min.js"></script>
+<script src="external/jquery-ui.min.js"></script>
+
+<script src="src/recurrent.js"></script>
+<script src="src/vis.js"></script>
+</head>
+
+<body>
+<a href="https://github.com/karpathy/recurrentjs"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
+
+
+<div id="wrap">
+  <h1>Deep Recurrent Nets math demo</h1>
+  <div id="intro">
+    This demo shows usage of the <a href="https://github.com/karpathy/recurrentjs">recurrentjs library</a> that allows you to train deep Recurrent Neural Networks (RNN) and Long Short-Term Memory Networks (LSTM) in Javascript. But the core of the library is more general and allows you to set up arbitrary expression graphs that support fully automatic backpropagation.<br><br>
+
+    In this demo we take a dataset of random math characters as input and learn to memorize the math logic character by character. That is, the RNN/LSTM takes a character, its context from previous time steps (as mediated by the hidden layers) and predicts the next character in the sequence. Here is an example: <br><br>
+
+    <div style="text-align:center;"><img src="eg.png"></div>
+
+    In the example image above that depicts a deep RNN, every character has an associated "letter vector" that we will train with backpropagation. These letter vectors are combined through a (learnable) Matrix-vector multiply transformation into the first hidden layer representation (yellow), then into second hidden layer representation (purple), and finally into the output space (blue). The output space has dimensionality equal to the number of characters in the dataset and every dimension provides the probability of the next character in the sequence. The network is therefore trained to always predict the next character (using Softmax + cross-entropy loss on all letters). The quantity we track during training is called the <b>perplexity</b>, which measures how surprised the network is to see the next character in a sequence. For example, if perplexity is 4.0 then it's as if the network was guessing uniformly at random from 4 possible characters for next letter (i.e. lowest it can be is 1). At test time, the prediction is currently done iteratively character by character in a greedy fashion, but I might eventually implemented more sophisticated methods (e.g. beam search).<br><br>
+
+    The demo is populated with random math from javascript.<br><br>
+
+    For suggestions/bugs ping me at <a href="https://twitter.com/karpathy">@karpathy</a>.<br><br>
+
+  </div>
+  <div>
+    <div class="hh">Input sentences:</div>
+    <textarea style="width:100%; height:200px;" id="ti">000
+011
+100
+110</textarea>
+  </div>
+  <div id="prepro_status"></div>
+
+  <div class="hh">Controls/Options:</div>
+  <button id="learn" class="abutton">learn/restart</button>
+  <button id="resume" class="abutton">resume</button>
+  <button id="stop" class="abutton">pause</button>
+  <!-- <button id="gradcheck">gradcheck</button> -->
+  <textarea id="newnet" style="width:100%; height:200px;">
+
+// model parameters
+generator = 'rnn'; // can be 'rnn' or 'lstm'
+hidden_sizes = [3]; // list of sizes of hidden layers
+letter_size = 3; // size of letter embeddings
+
+// optimization
+regc = 0.000001; // L2 regularization strength
+learning_rate = 0.01; // learning rate
+clipval = 5.0; // clip gradients at this value
+  </textarea><br />
+  protip: if your perplexity is exploding with Infinity try lowering the initial learning rate
+  <br>
+  <div id="status">
+
+    <div>
+      <div class="hh">Training stats:</div>
+      <div class="aslider">
+        <div class="slider_header">Learning rate: you want to anneal this over time if you're training for longer time.</div>
+        <div class="theslider" id="lr_slider"></div>
+        <div class="slider_value" id="lr_text"></div>
+      </div>
+
+      <canvas id="pplgraph"></canvas>
+      <div id="ticktime"></div>
+      <div id="gradclip"></div>
+      <div id="epoch"></div>
+      <div id="ppl"></div>
+
+      <div style="clear:both;"></div>
+    </div>
+
+    <div class="hh">Model samples:</div>
+    <div id="controls">
+      <div class="aslider">
+        <div class="slider_header">Softmax sample temperature: lower setting will generate more likely predictions, but you'll see more of the same common words again and again. Higher setting will generate less frequent words but you might see more spelling errors.</div>
+        <div class="theslider" id="temperature_slider"></div>
+        <div class="slider_value" id="temperature_text"></div>
+      </div>
+    </div>
+    <div id="samples"></div>
+    <div class="hh">Greedy argmax prediction:</div>
+    <div id="argmax"></div>
+  </div>
+  <div id="io">
+    <div class="hh">I/O save/load model JSON</div>
+
+    <button id="savemodel" class="abutton">save model</button>
+    <button id="loadmodel" class="abutton">load model</button>
+    <div>
+      You can save or load models with JSON using the textarea below.
+    </div>
+    <textarea style="width:100%; height:200px;" id="tio"></textarea>
+
+    <br>
+    <div class="hh">Pretrained model:</div>
+    You can also choose to load an example pretrained model with the button below to see what the predictions look like in later stages. The pretrained model is an LSTM with one layer of 100 units, trained for ~10 hours. After clicking button below you should see the perplexity plummet to about 3.0, and see the predictions become better.<br>
+    <button id="loadpretrained" class="abutton">load pretrained</button>
+
+  </div>
+</div>
+<script type="text/javascript">
+  function shuffle(array) {
+    var currentIndex = array.length, temporaryValue, randomIndex;
+
+    // While there remain elements to shuffle...
+    while (0 !== currentIndex) {
+      // Pick a remaining element...
+      randomIndex = Math.floor(Math.random() * currentIndex);
+      currentIndex -= 1;
+
+      // And swap it with the current element.
+      temporaryValue = array[currentIndex];
+      array[currentIndex] = array[randomIndex];
+      array[randomIndex] = temporaryValue;
+    }
+
+    return array;
+  }
+
+  // prediction params
+  var sample_softmax_temperature = 1.0; // how peaky model predictions should be
+  var max_chars_gen = 100; // max length of generated sentences
+
+  // various global var inits
+  var epoch_size = -1;
+  var input_size = -1;
+  var output_size = -1;
+  var letterToIndex = {};
+  var indexToLetter = {};
+  var vocab = [];
+  var data_sents = [];
+  var solver = new R.Solver(); // should be class because it needs memory for step caches
+  var pplGraph = new Rvis.Graph();
+
+  var model = {};
+
+  var initVocab = function(sents, count_threshold) {
+    // go over all characters and keep track of all unique ones seen
+    var txt = sents.join(''); // concat all
+
+    // count up all characters
+    var d = {};
+    for(var i=0,n=txt.length;i<n;i++) {
+      var txti = txt[i];
+      if(txti in d) { d[txti] += 1; }
+      else { d[txti] = 1; }
+    }
+
+    // filter by count threshold and create pointers
+    letterToIndex = {};
+    indexToLetter = {};
+    vocab = [];
+    // NOTE: start at one because we will have START and END tokens!
+    // that is, START token will be index 0 in model letter vectors
+    // and END token will be index 0 in the next character softmax
+    var q = 1;
+    for(ch in d) {
+      if(d.hasOwnProperty(ch)) {
+        if(d[ch] >= count_threshold) {
+          // add character to vocab
+          letterToIndex[ch] = q;
+          indexToLetter[q] = ch;
+          vocab.push(ch);
+          q++;
+        }
+      }
+    }
+
+    // globals written: indexToLetter, letterToIndex, vocab (list), and:
+    input_size = vocab.length + 1;
+    output_size = vocab.length + 1;
+    epoch_size = sents.length;
+    $("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
+  };
+
+  var utilAddToModel = function(modelto, modelfrom) {
+    for(var k in modelfrom) {
+      if(modelfrom.hasOwnProperty(k)) {
+        // copy over the pointer but change the key to use the append
+        modelto[k] = modelfrom[k];
+      }
+    }
+  };
+
+  var initModel = function() {
+    // letter embedding vectors
+    var model = {};
+    model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
+
+    if(generator === 'rnn') {
+      var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
+      utilAddToModel(model, rnn);
+    } else {
+      var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
+      utilAddToModel(model, lstm);
+    }
+
+    return model;
+  };
+
+  var reinit_learning_rate_slider = function() {
+    // init learning rate slider for controlling the decay
+    // note that learning_rate is a global variable
+    $("#lr_slider").slider({
+      min: Math.log10(0.01) - 3.0,
+      max: Math.log10(0.01) + 0.05,
+      step: 0.05,
+      value: Math.log10(learning_rate),
+      slide: function( event, ui ) {
+        learning_rate = Math.pow(10, ui.value);
+        $("#lr_text").text(learning_rate.toFixed(5));
+      }
+    });
+    $("#lr_text").text(learning_rate.toFixed(5));
+  };
+
+  var reinit = function() {
+    // note: reinit writes global vars
+
+    // eval options to set some globals
+    eval($("#newnet").val());
+
+    reinit_learning_rate_slider();
+
+    solver = new R.Solver(); // reinit solver
+    pplGraph = new Rvis.Graph();
+
+    ppl_list = [];
+    tick_iter = 0;
+
+    // process the input, filter out blanks
+    var data_sents_raw = $('#ti').val().split('\n');
+    data_sents = [];
+    for(var i=0;i<data_sents_raw.length;i++) {
+      var sent = data_sents_raw[i].trim();
+      if(sent.length > 0) {
+        data_sents.push(sent);
+      }
+    }
+
+    initVocab(data_sents, 1); // takes count threshold for characters
+    model = initModel();
+  };
+
+  var saveModel = function() {
+    var out = {};
+    out['hidden_sizes'] = hidden_sizes;
+    out['generator'] = generator;
+    out['letter_size'] = letter_size;
+    var model_out = {};
+    for(var k in model) {
+      if(model.hasOwnProperty(k)) {
+        model_out[k] = model[k].toJSON();
+      }
+    }
+    out['model'] = model_out;
+    var solver_out = {};
+    solver_out['decay_rate'] = solver.decay_rate;
+    solver_out['smooth_eps'] = solver.smooth_eps;
+    var step_cache_out = {};
+    for(k in solver.step_cache) {
+      if(solver.step_cache.hasOwnProperty(k)) {
+        step_cache_out[k] = solver.step_cache[k].toJSON();
+      }
+    }
+    solver_out['step_cache'] = step_cache_out;
+    out['solver'] = solver_out;
+    out['letterToIndex'] = letterToIndex;
+    out['indexToLetter'] = indexToLetter;
+    out['vocab'] = vocab;
+    return JSON.stringify(out);
+  };
+
+  var hidden_sizes;
+  var generator;
+  var letter_size;
+  var learning_rate;
+
+  var loadModel = function(j) {
+    hidden_sizes = j.hidden_sizes;
+    generator = j.generator;
+    letter_size = j.letter_size;
+    model = {};
+    for(var k in j.model) {
+      if(j.model.hasOwnProperty(k)) {
+        var matjson = j.model[k];
+        model[k] = new R.Mat(1,1);
+        model[k].fromJSON(matjson);
+      }
+    }
+    solver = new R.Solver(); // have to reinit the solver since model changed
+    solver.decay_rate = j.solver.decay_rate;
+    solver.smooth_eps = j.solver.smooth_eps;
+    solver.step_cache = {};
+    for(k in j.solver.step_cache){
+      if(j.solver.step_cache.hasOwnProperty(k)){
+        matjson = j.solver.step_cache[k];
+        solver.step_cache[k] = new R.Mat(1,1);
+        solver.step_cache[k].fromJSON(matjson);
+      }
+    }
+    letterToIndex = j['letterToIndex'];
+    indexToLetter = j['indexToLetter'];
+    vocab = j['vocab'];
+
+    // reinit these
+    ppl_list = [];
+    tick_iter = 0;
+  };
+
+  var forwardIndex = function(G, model, ix, prev) {
+    var x = G.rowPluck(model['Wil'], ix);
+    // forward prop the sequence learner
+    if(generator === 'rnn') {
+      return R.forwardRNN(G, model, hidden_sizes, x, prev);
+    } else {
+      return R.forwardLSTM(G, model, hidden_sizes, x, prev);
+    }
+  };
+
+  var predictSentence = function(model, samplei, temperature) {
+    if(typeof samplei === 'undefined') { samplei = false; }
+    if(typeof temperature === 'undefined') { temperature = 1.0; }
+
+    var G = new R.Graph(false);
+    var s = '';
+    var prev = {};
+    while(true) {
+
+      // RNN tick
+      var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
+      var lh = forwardIndex(G, model, ix, prev);
+      prev = lh;
+
+      // sample predicted letter
+      var logprobs = lh.o;
+      if(temperature !== 1.0 && samplei) {
+        // scale log probabilities by temperature and renormalize
+        // if temperature is high, logprobs will go towards zero
+        // and the softmax outputs will be more diffuse. if temperature is
+        // very low, the softmax outputs will be more peaky
+        for(var q=0,nq=logprobs.w.length;q<nq;q++) {
+          logprobs.w[q] /= temperature;
+        }
+      }
+
+      var probs = R.softmax(logprobs);
+      if(samplei) {
+        ix = R.samplei(probs.w);
+      } else {
+        ix = R.maxi(probs.w);
+      }
+
+      if(ix === 0) break; // END token predicted, break out
+      if(s.length > max_chars_gen) { break; } // something is wrong
+
+      var letter = indexToLetter[ix];
+      s += letter;
+    }
+    return s;
+  };
+
+  var costfun = function(model, sent) {
+    // takes a model and a sentence and
+    // calculates the loss. Also returns the Graph
+    // object which can be used to do backprop
+    var n = sent.length;
+    var G = new R.Graph();
+    var log2ppl = 0.0;
+    var cost = 0.0;
+    var prev = {};
+    for(var i=-1;i<n;i++) {
+      // start and end tokens are zeros
+      var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
+      var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
+
+      var lh = forwardIndex(G, model, ix_source, prev);
+      prev = lh;
+
+      // set gradients into logprobabilities
+      var logprobs = lh.o; // interpret output as logprobs
+      var probs = R.softmax(logprobs); // compute the softmax probabilities
+
+      log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
+      cost += -Math.log(probs.w[ix_target]);
+
+      // write gradients into log probabilities
+      logprobs.dw = probs.w;
+      logprobs.dw[ix_target] -= 1
+    }
+    var ppl = Math.pow(2, log2ppl / (n - 1));
+    return {'G':G, 'ppl':ppl, 'cost':cost};
+  };
+
+  function median(values) {
+    values.sort( function(a,b) {return a - b;} );
+    var half = Math.floor(values.length/2);
+    if(values.length % 2) return values[half];
+    else return (values[half-1] + values[half]) / 2.0;
+  }
+
+  var ppl_list = [];
+  var tick_iter = 0;
+  var tick = function() {
+
+    // sample sentence fromd data
+    var sentix = R.randi(0,data_sents.length);
+    var sent = data_sents[sentix];
+
+    var t0 = +new Date();  // log start timestamp
+
+    // evaluate cost function on a sentence
+    var cost_struct = costfun(model, sent);
+
+    // use built up graph to compute backprop (set .dw fields in mats)
+    cost_struct.G.backward();
+
+    // perform param update
+    var solver_stats = solver.step(model, learning_rate, regc, clipval);
+    //$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
+
+    var t1 = +new Date();
+    var tick_time = t1 - t0;
+
+    ppl_list.push(cost_struct.ppl); // keep track of perplexity
+
+    // evaluate now and then
+    tick_iter += 1;
+    if(tick_iter % 50 === 0) {
+      // draw samples
+      $('#samples').html('');
+      for(var q=0;q<5;q++) {
+        var pred = predictSentence(model, true, sample_softmax_temperature);
+        var pred_div = '<div class="apred">'+pred+'</div>'
+        $('#samples').append(pred_div);
+      }
+    }
+    if(tick_iter % 10 === 0) {
+      // draw argmax prediction
+      $('#argmax').html('');
+      var pred = predictSentence(model, false);
+      var pred_div = '<div class="apred">'+pred+'</div>'
+      $('#argmax').append(pred_div);
+
+      // keep track of perplexity
+      $('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
+      $('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
+      $('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
+
+      if(tick_iter % 100 === 0) {
+        var median_ppl = median(ppl_list);
+        ppl_list = [];
+        pplGraph.add(tick_iter, median_ppl);
+        pplGraph.drawSelf(document.getElementById("pplgraph"));
+      }
+    }
+  };
+
+  var gradCheck = function() {
+    var model = initModel();
+    var sent = '^test sentence$';
+    var cost_struct = costfun(model, sent);
+    cost_struct.G.backward();
+    var eps = 0.000001;
+
+    for(var k in model) {
+      if(model.hasOwnProperty(k)) {
+        var m = model[k]; // mat ref
+        for(var i=0,n=m.w.length;i<n;i++) {
+
+          oldval = m.w[i];
+          m.w[i] = oldval + eps;
+          var c0 = costfun(model, sent);
+          m.w[i] = oldval - eps;
+          var c1 = costfun(model, sent);
+          m.w[i] = oldval;
+
+          var gnum = (c0.cost - c1.cost)/(2 * eps);
+          var ganal = m.dw[i];
+          var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
+          if(relerr > 1e-1) {
+            console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
+          }
+        }
+      }
+    }
+  };
+
+  var iid = null;
+  $(function() {
+
+    // attach button handlers
+    $('#learn').click(function(){
+      reinit();
+      if(iid !== null) { clearInterval(iid); }
+      iid = setInterval(tick, 0);
+      //iid = setTimeout(tick, 0);
+      //setTimeout(function() {
+        //console.log(saveModel());
+      //}, 1000);
+    });
+    $('#stop').click(function(){
+      if(iid !== null) { clearInterval(iid); }
+      iid = null;
+    });
+    $("#resume").click(function(){
+      if(iid === null) {
+        iid = setInterval(tick, 0);
+      }
+    });
+
+    $("#savemodel").click(saveModel);
+    $("#loadmodel").click(function(){
+      var j = JSON.parse($("#tio").val());
+      loadModel(j);
+    });
+
+    $("#loadpretrained").click(function(){
+      $.getJSON("lstm_100_model.json", function(data) {
+        pplGraph = new Rvis.Graph();
+        learning_rate = 0.0001;
+        reinit_learning_rate_slider();
+        loadModel(data);
+      });
+    });
+
+    $("#learn").click(); // simulate click on startup
+
+    //$('#gradcheck').click(gradCheck);
+
+    $("#temperature_slider").slider({
+      min: -1,
+      max: 1.05,
+      step: 0.05,
+      value: 0,
+      slide: function( event, ui ) {
+        sample_softmax_temperature = Math.pow(10, ui.value);
+        $("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
+      }
+    });
+  });
+
+</script>
+</body>
+</html>

From 7ac113b89315ed1acd1fb06439aefde88b60fc96 Mon Sep 17 00:00:00 2001
From: Robert Plummer <robertleeplummerjr@gmail.com>
Date: Fri, 4 Nov 2016 11:42:50 -0400
Subject: [PATCH 5/5] added xor demo and
 https://github.com/harthur-org/rnn-viewer

---
 math_demo.html   |  85 +++++++++++++++-
 rnn-viewer.js    | 245 +++++++++++++++++++++++++++++++++++++++++++++++
 src/recurrent.js |   1 -
 xor_demo.html    |  82 ++++++++++++++++
 4 files changed, 411 insertions(+), 2 deletions(-)
 create mode 100644 rnn-viewer.js

diff --git a/math_demo.html b/math_demo.html
index 21c25b0..6742f1b 100644
--- a/math_demo.html
+++ b/math_demo.html
@@ -77,9 +77,13 @@
 
 <script src="src/recurrent.js"></script>
 <script src="src/vis.js"></script>
+<script src="node_modules/three/build/three.min.js"></script>
+<script src="node_modules/three/examples/js/controls/OrbitControls.js"></script>
+<script src="rnn-viewer.js"></script>
 </head>
 
 <body>
+<div id="container"></div>
 <a href="https://github.com/karpathy/recurrentjs"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
 
 
@@ -113,7 +117,7 @@ <h1>Deep Recurrent Nets math demo</h1>
   <textarea id="newnet" style="width:100%; height:200px;">
 
 // model parameters
-generator = 'lstm'; // can be 'rnn' or 'lstm'
+generator = 'rnn'; // can be 'rnn' or 'lstm'
 hidden_sizes = [20,20]; // list of sizes of hidden layers
 letter_size = 6; // size of letter embeddings
 
@@ -632,5 +636,84 @@ <h1>Deep Recurrent Nets math demo</h1>
   });
 
 </script>
+
+<script>
+  function adaptMatrix(old) {
+    old.rows = old.n;
+    old.columns = old.d;
+    old.__defineGetter__('weights', function() {
+      return old.w;
+    });
+    old.__defineGetter__('recurrence', function() {
+      return old.dw;
+    });
+    return old;
+  }
+  setTimeout(function() {
+    var rnnViewer = new RNNViewer({
+      container: document.getElementById('container')
+    })
+        .addMatrix(adaptMatrix(model['Wil']));
+
+
+      for (var d = 0; d < hidden_sizes.length; d++) {
+        if (generator === 'lstm') {
+          rnnViewer
+              .addMatrix(adaptMatrix(model['Wix' + d]))
+              .addMatrix(adaptMatrix(model['Wih' + d]))
+              .addMatrix(adaptMatrix(model['bi' + d]))
+              .addMatrix(adaptMatrix(model['Wfx' + d]))
+              .addMatrix(adaptMatrix(model['Wfh' + d]))
+              .addMatrix(adaptMatrix(model['bf' + d]))
+              .addMatrix(adaptMatrix(model['Wox' + d]))
+              .addMatrix(adaptMatrix(model['Woh' + d]))
+              .addMatrix(adaptMatrix(model['bo' + d]))
+              // cell write params
+              .addMatrix(adaptMatrix(model['Wcx' + d]))
+              .addMatrix(adaptMatrix(model['Wch' + d]))
+              .addMatrix(adaptMatrix(model['bc' + d]));
+        } else {
+          rnnViewer
+              .addMatrix(adaptMatrix(model['Wxh' + d]))
+              .addMatrix(adaptMatrix(model['Whh' + d]))
+              .addMatrix(adaptMatrix(model['bhh' + d]));
+        }
+      }
+
+        rnnViewer
+        // decoder params
+        .addMatrix(adaptMatrix(model['Whd']))
+        .addMatrix(adaptMatrix(model['bd']))
+        .render();
+
+    setInterval(function() {
+      var coldColor = rnnViewer.coldColor,
+          hotColor = rnnViewer.hotColor;
+
+      rnnViewer.values.forEach(function(value) {
+        var v = value.percentValue,
+            r = (coldColor.r + hotColor.r) / v,
+            g = (coldColor.g + hotColor.g) / v,
+            b = (coldColor.b + hotColor.b) / v;
+
+        value.frontFace.color.setRGB(
+            r,
+            g,
+            b
+        );
+        value.rearFace.color.setRGB(
+            r,
+            g,
+            b
+        );
+        value.square.colorsNeedUpdate = true;
+        //value.mesh.geometry.elementsNeedUpdate = true;
+        value.mesh.geometry.colorsNeedUpdate = true;
+      });
+
+      rnnViewer.render();
+    }, 200);
+  }, 1000);
+</script>
 </body>
 </html>
diff --git a/rnn-viewer.js b/rnn-viewer.js
new file mode 100644
index 0000000..558511f
--- /dev/null
+++ b/rnn-viewer.js
@@ -0,0 +1,245 @@
+function RNNViewer(settings) {
+  Object.assign(this, RNNViewer.defaults, settings);
+
+  this.net = settings.net;
+  this.boundingGrid = null;
+  this.values = [];
+  this.grids = [];
+  this.matrices = [];
+  this.controls = null;
+  this.scene = null;
+  this.camera = null;
+  this.light = null;
+  this.renderer = null;
+  this.stats = null;
+
+  this.init();
+
+  if (this.net) {
+    var model = this.net.model;
+    var addMatrix = this.addMatrix.bind(this);
+
+    addMatrix(model.input);
+
+    model.hiddenLayers.forEach(function(hiddenLayer) {
+      for (var p in hiddenLayer) {
+        if (!hiddenLayer.hasOwnProperty(p)) continue;
+        addMatrix(hiddenLayer[p]);
+      }
+    });
+
+    addMatrix(model.outputConnector);
+    addMatrix(model.output);
+  }
+
+  this.animate();
+}
+
+RNNViewer.defaults = {
+  net: null,
+  container: null,
+  height: window.innerHeight,
+  width: window.innerWidth,
+  depth: 100,
+  hotColor: new THREE.Color(0xff55f9),
+  coldColor: new THREE.Color(0x050638),
+  squareWidth: 10,
+  squareHeight: 10,
+  devicePixelRatio: window.devicePixelRatio,
+  includeStats: false
+};
+
+RNNViewer.prototype = {
+  init: function() {
+    //Set up camera
+    var vFOVRadians = 2 * Math.atan(this.height / (2 * 1500)),
+      fov = vFOVRadians * 180 / Math.PI,
+      startPosition = this.startPosition = new THREE.Vector3(0, 0, 3000);
+
+    var camera = this.camera = new THREE.PerspectiveCamera(fov, this.width / this.height, 1, 30000);
+    camera.position.set(startPosition.x, startPosition.y, startPosition.z);
+
+    var controls = this.controls = new THREE.OrbitControls(camera);
+    controls.damping = 0.2;
+    controls.addEventListener('change', this.render.bind(this));
+
+    //Create scenes for webGL
+    var scene = this.scene = new THREE.Scene();
+    //Add a light source & create Canvas
+    var light = this.light = new THREE.DirectionalLight( 0xffffff );
+    light.position.set(0, 0, 1);
+    scene.add(light);
+
+    //set up webGL renderer
+    var renderer = this.renderer = new THREE.WebGLRenderer();
+    renderer.setPixelRatio(this.devicePixelRatio);
+    renderer.setSize(this.width, this.height);
+    this.container.appendChild(renderer.domElement);
+
+    //stats
+    if (this.includeStats) {
+      var stats = this.stats = new Stats();
+      stats.domElement.style.position = 'absolute';
+      stats.domElement.style.bottom = '10px';
+      stats.domElement.style.left = '10px';
+      this.container.appendChild(stats.domElement);
+    }
+
+    var boundingGrid = this.boundingGrid = new THREE.Object3D();
+    scene.add(boundingGrid);
+    return this;
+  },
+  update: function() {
+    var hotColor = this.settings.hotColor;
+    var coldColor = this.settings.coldColor;
+    return this;
+  },
+  render: function() {
+    var depth = this.depth;
+    this.grids.forEach(function(grid, i, grids) {
+      grid.position.z = (grids.length - i) * depth;
+    });
+
+    this.camera.lookAt(this.scene.position);
+    this.renderer.render(this.scene, this.camera);
+    if (this.stats) this.stats.update();
+    return this;
+  },
+  animate: function() {
+    this.controls.update();
+    window.requestAnimationFrame(this.animate.bind(this));
+    return this;
+  },
+  addMatrix: function (matrix) {
+    var grid = new THREE.Object3D(),
+      depth = this.depth,
+      rows = matrix.rows,
+      columns = matrix.columns,
+      xPixel = -(this.squareWidth * columns)/ 2,
+      yPixel = -(this.squareHeight * rows) / 2,
+      lowValue = 0,
+      highValue = 0,
+      index = 0;
+
+    //height
+    for (var row = 1; row <= rows; row++) {
+      xPixel = -(this.squareWidth * columns) / 2;
+      for (var column = 1; column <= columns; column++) {
+        var color = this.coldColor.clone();
+        var material = new THREE.MeshBasicMaterial({
+          color: color,
+          side: THREE.DoubleSide,
+          vertexColors: THREE.FaceColors
+        });
+        var square = new THREE.Geometry();
+        square.vertices.push(new THREE.Vector3(xPixel                   , yPixel                     , 0));
+        square.vertices.push(new THREE.Vector3(xPixel                   , yPixel + this.squareHeight , 0));
+        square.vertices.push(new THREE.Vector3(xPixel + this.squareWidth, yPixel + this.squareHeight , 0));
+        square.vertices.push(new THREE.Vector3(xPixel + this.squareWidth, yPixel                     , 0));
+
+        square.faces.push(new THREE.Face3(0, 1, 2));
+        square.faces.push(new THREE.Face3(0, 3, 2));
+        var mesh = new THREE.Mesh(square, material);
+        grid.add(mesh);
+
+        this.values.push({
+          color: color,
+          row: row - 1,
+          column: column - 1,
+          matrixIndex: this.grids.length,
+          square: square,
+          mesh: mesh,
+          frontFace: mesh.geometry.faces[0],
+          rearFace: mesh.geometry.faces[1],
+          index: index,
+          matrix: matrix,
+          get value() {
+            var value = this.matrix.weights[this.index];
+            if (value > highValue) {
+              highValue = value;
+            }
+            if (value < lowValue) {
+              lowValue = value;
+            }
+            return value || 0;
+          },
+          get percentValue() {
+            var value = this.value;
+            var normalizedHigh = highValue - lowValue;
+            var normalizedValue = value - lowValue;
+            return (normalizedHigh - normalizedValue) / normalizedHigh;
+          }
+        });
+
+        xPixel += this.squareWidth;
+        index++;
+      }
+      yPixel += this.squareHeight;
+    }
+
+    this.grids.push(grid);
+    this.matrices.push(matrix);
+    this.boundingGrid.add(grid);
+
+    return this;
+  },
+  viewTop: function() {
+    this.controls.reset();
+
+    var vFOVRadians = 2 * Math.atan(this.height / ( 2 * 35000 )),
+      fov = vFOVRadians * 180 / Math.PI;
+
+    this.camera.fov = fov;
+    this.controls.rotateUp(90 * Math.PI / 180);
+    this.camera.position.z = this.startPosition.z * 23;
+    this.camera.position.y = this.startPosition.z * 55;
+    this.camera.far = 1000000;
+    this.camera.updateProjectionMatrix();
+    return this.render();
+  },
+  viewSide: function() {
+    this.controls.reset();
+
+    var vFOVRadians = 2 * Math.atan(this.height / ( 2 * 35000 )),
+      fov = vFOVRadians * 180 / Math.PI;
+
+    this.camera.fov = fov;
+    this.camera.position.z = this.startPosition.z * 58;
+    this.camera.far = 1000000;
+    this.camera.updateProjectionMatrix();
+    return this.render();
+  },
+  viewDefault: function() {
+    this.controls.reset();
+
+    this.camera.fov = 30;
+    this.camera.updateProjectionMatrix();
+    return this.render();
+  },
+  setSize: function(width, height) {
+    this.width = width;
+    this.height = height;
+    this.renderer.setSize(this.width, this.height);
+    return this.render();
+  },
+  setValue: function(v) {
+    var v = Math.random() * 2,
+      r = (coldColor.r + hotColor.r) / v,
+      g = (coldColor.g + hotColor.g) / v,
+      b = (coldColor.b + hotColor.b) / v;
+
+    value.frontFace.color.setRGB(
+      r,
+      g,
+      b
+    );
+    value.rearFace.color.setRGB(
+      r,
+      g,
+      b
+    );
+    value.square.colorsNeedUpdate = true;
+    //value.mesh.geometry.elementsNeedUpdate = true;
+    value.mesh.geometry.colorsNeedUpdate = true;
+  }
+};
\ No newline at end of file
diff --git a/src/recurrent.js b/src/recurrent.js
index f5adc87..221d4af 100644
--- a/src/recurrent.js
+++ b/src/recurrent.js
@@ -506,7 +506,6 @@ var R = {}; // the Recurrent library
       var hidden_size = hidden_sizes[d];
       model['Wxh' + d] = new R.RandMat(hidden_size, prev_size, 0, 0.08);
       model['Whh' + d] = new R.RandMat(hidden_size, hidden_size, 0, 0.08);
-      model['Whh' + d].is = true;
       model['bhh' + d] = new R.Mat(hidden_size, 1);
     }
     // decoder params
diff --git a/xor_demo.html b/xor_demo.html
index 7423e2e..e2245d0 100644
--- a/xor_demo.html
+++ b/xor_demo.html
@@ -77,9 +77,13 @@
 
 <script src="src/recurrent.js"></script>
 <script src="src/vis.js"></script>
+<script src="node_modules/three/build/three.min.js"></script>
+<script src="node_modules/three/examples/js/controls/OrbitControls.js"></script>
+<script src="rnn-viewer.js"></script>
 </head>
 
 <body>
+<div id="container"></div>
 <a href="https://github.com/karpathy/recurrentjs"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
 
 
@@ -616,7 +620,85 @@ <h1>Deep Recurrent Nets math demo</h1>
       }
     });
   });
+</script>
+
+<script>
+  function adaptMatrix(old) {
+    old.rows = old.n;
+    old.columns = old.d;
+    old.__defineGetter__('weights', function() {
+      return old.w;
+    });
+    old.__defineGetter__('recurrence', function() {
+      return old.dw;
+    });
+    return old;
+  }
+  setTimeout(function() {
+    var rnnViewer = new RNNViewer({
+      container: document.getElementById('container')
+    })
+        .addMatrix(adaptMatrix(model['Wil']));
+
+
+    for (var d = 0; d < hidden_sizes.length; d++) {
+      if (generator === 'lstm') {
+        rnnViewer
+            .addMatrix(adaptMatrix(model['Wix' + d]))
+            .addMatrix(adaptMatrix(model['Wih' + d]))
+            .addMatrix(adaptMatrix(model['bi' + d]))
+            .addMatrix(adaptMatrix(model['Wfx' + d]))
+            .addMatrix(adaptMatrix(model['Wfh' + d]))
+            .addMatrix(adaptMatrix(model['bf' + d]))
+            .addMatrix(adaptMatrix(model['Wox' + d]))
+            .addMatrix(adaptMatrix(model['Woh' + d]))
+            .addMatrix(adaptMatrix(model['bo' + d]))
+            // cell write params
+            .addMatrix(adaptMatrix(model['Wcx' + d]))
+            .addMatrix(adaptMatrix(model['Wch' + d]))
+            .addMatrix(adaptMatrix(model['bc' + d]));
+      } else {
+        rnnViewer
+            .addMatrix(adaptMatrix(model['Wxh' + d]))
+            .addMatrix(adaptMatrix(model['Whh' + d]))
+            .addMatrix(adaptMatrix(model['bhh' + d]));
+      }
+    }
+
+    rnnViewer
+    // decoder params
+        .addMatrix(adaptMatrix(model['Whd']))
+        .addMatrix(adaptMatrix(model['bd']))
+        .render();
+
+    setInterval(function() {
+      var coldColor = rnnViewer.coldColor,
+          hotColor = rnnViewer.hotColor;
+
+      rnnViewer.values.forEach(function(value) {
+        var v = value.percentValue * 100,
+            r = (coldColor.r + hotColor.r) / v,
+            g = (coldColor.g + hotColor.g) / v,
+            b = (coldColor.b + hotColor.b) / v;
+
+        value.frontFace.color.setRGB(
+            r,
+            g,
+            b
+        );
+        value.rearFace.color.setRGB(
+            r,
+            g,
+            b
+        );
+        value.square.colorsNeedUpdate = true;
+        //value.mesh.geometry.elementsNeedUpdate = true;
+        value.mesh.geometry.colorsNeedUpdate = true;
+      });
 
+      rnnViewer.render();
+    }, 200);
+  }, 1000);
 </script>
 </body>
 </html>