I can't see multiple subscriptions being an issue as long as long as your handling them properly. I've occasionally seen that disconnect/reconnect cycle going on after a new upload. If it does it more than 4-5 times, I pull the power, and it seems to behave once it powers up again. It still responds to OTA programming whilst doing that, and did calm it down one time.
Yes, you do need the loop() function, and it should have at least a delay(0); in it, so that the particle cloud code can run and update - preferably a longer delay though maybe 100 (milliseconds)... and in your myHandler function, should if ( (strcmp(data, "on") == 0)){digitalWrite(led, HIGH);} be ==1, instead of ==0? Just looks like it would never turn the LED on!
You could also craft the code so that the subscribe handles just grab the data, and set a flag, and the main loop function processes and uses it - like the code I'm using below to run a tft display. It grabs the data, sets a flag, and on the next run of loop, updates the display. It's a bit of a kludge ATM, but it works... IT'S ALIVE!
btw, I have run into a bug in the Particle publish / subscribe model that is known but apparently not fixed yet? When you subscribe to an event, even if you specify your device ID to try and only get private events, it can and will also get public events if the event name is not unique (I was scratching my head to work out why my temperature was updating faster than every 30 seconds, and flipping between 28C (true temp) and 21C (would like, but not true temp!). Setting my publish event to PRIVATE, and changing the subscribe to restrict to MY_DEVICES instead of a specific one seems to have calmed things down

Display / receive code:
#include <Adafruit_ILI9341.h>
#include <Adafruit_GFX.h>
#include <SPI.h>
// For the Adafruit shield, these are the default.
#define TFT_DC 1
#define TFT_CS 6
// Use hardware SPI (on Uno, #13, #12, #11) and the above for CS/DC
Adafruit_ILI9341 tft = Adafruit_ILI9341(TFT_CS, TFT_DC);
boolean updateDisplay = false;
boolean firstRun = true;
char buffer[8] = " ";
float tempC = 0.0;
void myHandler(const char *event, const char *data)
{
if (data)
{
updateDisplay = true;
strcpy_P(buffer, data);
tempC = atof(buffer);
}
else
{
updateDisplay = false;
tempC = 0.0;
}
}
void setup()
{
tft.begin();
tft.setRotation(1);
Particle.subscribe("tempC", myHandler, MY_DEVICES);
}
void loop(void)
{
if (firstRun == true)
{
tft.fillScreen(ILI9341_BLACK);
tft.setCursor(0, 0);
tft.setTextColor(ILI9341_GREEN); tft.setTextSize(3);
tft.println("Remote");
tft.println("Temperature");
tft.println();
tft.setTextColor(ILI9341_YELLOW); tft.setTextSize(3);
tft.println("Waiting for");
tft.println("update... ");
tft.println();
firstRun = false;
}
if (updateDisplay == true)
{
tft.fillScreen(ILI9341_BLACK);
tft.setCursor(0, 0);
tft.setTextColor(ILI9341_GREEN); tft.setTextSize(3);
tft.println("Remote");
tft.println("Temperature");
tft.println();
tft.setTextColor(ILI9341_YELLOW); tft.setTextSize(5);
tft.println(tempC);
updateDisplay = false;
}
delay(1000);
}
Sensor / transmit code:
#include <OneWire.h>
// OneWire DS18S20, DS18B20, DS1822 Temperature Example
//
// http://www.pjrc.com/teensy/td_libs_OneWire.html
//
// The DallasTemperature library can do all this work for you!
// http://milesburton.com/Dallas_Temperature_Control_Library
OneWire ds(2); // on pin 10 (a 4.7K resistor is necessary)
char tmpStr[8];
void setup(void) {
//Particle.publish("DS18x20_Temperature");
Serial.begin(9600);
}
void loop(void) {
byte i;
byte present = 0;
byte type_s;
byte data[12];
byte addr[8];
float celsius, fahrenheit;
if ( !ds.search(addr)) {
Serial.println("No more addresses.");
Serial.println();
ds.reset_search();
delay(250);
return;
}
Serial.print("ROM =");
for( i = 0; i < 8; i++) {
Serial.write(' ');
Serial.print(addr[i], HEX);
}
if (OneWire::crc8(addr, 7) != addr[7]) {
Serial.println("CRC is not valid!");
return;
}
Serial.println();
// the first ROM byte indicates which chip
switch (addr[0]) {
case 0x10:
Serial.println(" Chip = DS18S20"); // or old DS1820
type_s = 1;
break;
case 0x28:
Serial.println(" Chip = DS18B20");
type_s = 0;
break;
case 0x22:
Serial.println(" Chip = DS1822");
type_s = 0;
break;
default:
Serial.println("Device is not a DS18x20 family device.");
return;
}
ds.reset();
ds.select(addr);
ds.write(0x44, 1); // start conversion, with parasite power on at the end
delay(1000); // maybe 750ms is enough, maybe not
// we might do a ds.depower() here, but the reset will take care of it.
present = ds.reset();
ds.select(addr);
ds.write(0xBE); // Read Scratchpad
Serial.print(" Data = ");
Serial.print(present, HEX);
Serial.print(" ");
for ( i = 0; i < 9; i++) { // we need 9 bytes
data[i] = ds.read();
Serial.print(data[i], HEX);
Serial.print(" ");
}
Serial.print(" CRC=");
Serial.print(OneWire::crc8(data, 8), HEX);
Serial.println();
// Convert the data to actual temperature
// because the result is a 16 bit signed integer, it should
// be stored to an "int16_t" type, which is always 16 bits
// even when compiled on a 32 bit processor.
int16_t raw = (data[1] << 8) | data[0];
if (type_s) {
raw = raw << 3; // 9 bit resolution default
if (data[7] == 0x10) {
// "count remain" gives full 12 bit resolution
raw = (raw & 0xFFF0) + 12 - data[6];
}
} else {
byte cfg = (data[4] & 0x60);
// at lower res, the low bits are undefined, so let's zero them
if (cfg == 0x00) raw = raw & ~7; // 9 bit resolution, 93.75 ms
else if (cfg == 0x20) raw = raw & ~3; // 10 bit res, 187.5 ms
else if (cfg == 0x40) raw = raw & ~1; // 11 bit res, 375 ms
//// default is 12 bit resolution, 750 ms conversion time
}
celsius = (float)raw / 16.0;
fahrenheit = celsius * 1.8 + 32.0;
//push to Particle
dtostrf(celsius, 5, 2, tmpStr);
Particle.publish("tempC",tmpStr,30,PRIVATE);
Serial.print(" Temperature = ");
Serial.print(celsius);
Serial.print(" Celsius, ");
Serial.print(fahrenheit);
Serial.println(" Fahrenheit");
delay(30000);
}