mirror of
https://github.com/clearml/clearml
synced 2025-04-10 15:35:51 +00:00
Update examples
This commit is contained in:
parent
ef73bc258f
commit
8acb236b33
@ -4,10 +4,14 @@ import subprocess
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import socket
|
import socket
|
||||||
from tempfile import mkstemp
|
from tempfile import mkstemp
|
||||||
|
# make sure we have jupter in the auto requirements
|
||||||
import jupyter
|
import jupyter
|
||||||
from trains import Task
|
from trains import Task
|
||||||
|
|
||||||
|
|
||||||
|
# set default docker image, with network configuration
|
||||||
|
os.environ['TRAINS_DOCKER_IMAGE'] = 'nvidia/cuda --network host'
|
||||||
|
|
||||||
# initialize TRAINS
|
# initialize TRAINS
|
||||||
task = Task.init(project_name='examples', task_name='Remote Jupyter NoteBook')
|
task = Task.init(project_name='examples', task_name='Remote Jupyter NoteBook')
|
||||||
|
|
||||||
@ -21,33 +25,45 @@ for key in os.environ:
|
|||||||
if key.startswith('TRAINS') and key not in preserve:
|
if key.startswith('TRAINS') and key not in preserve:
|
||||||
env.pop(key, None)
|
env.pop(key, None)
|
||||||
|
|
||||||
|
# Add jupyter server base folder
|
||||||
|
param = {
|
||||||
|
'jupyter_server_base_directory': '',
|
||||||
|
}
|
||||||
|
task.connect(param)
|
||||||
|
|
||||||
# execute jupyter notebook
|
# execute jupyter notebook
|
||||||
fd, local_filename = mkstemp()
|
fd, local_filename = mkstemp()
|
||||||
print('Running Jupyter Notebook Server on {} [{}]'.format(socket.gethostname(), socket.gethostbyname(socket.gethostname())))
|
cwd = os.path.expandvars(os.path.expanduser(param['jupyter_server_base_directory'])) \
|
||||||
process = subprocess.Popen([sys.executable, '-m', 'jupyter', 'notebook'], env=env, stdout=fd, stderr=fd)
|
if param['jupyter_server_base_directory'] else os.getcwd()
|
||||||
|
print('Running Jupyter Notebook Server on {} [{}] at {}'.format(socket.gethostname(),
|
||||||
|
socket.gethostbyname(socket.gethostname()), cwd))
|
||||||
|
process = subprocess.Popen([sys.executable, '-m', 'jupyter', 'notebook', '--no-browser', '--allow-root'],
|
||||||
|
env=env, stdout=fd, stderr=fd, cwd=cwd)
|
||||||
|
|
||||||
# print stdout/stderr
|
# print stdout/stderr
|
||||||
prev_line_count = 0
|
prev_line_count = 0
|
||||||
while True:
|
process_running = True
|
||||||
|
while process_running:
|
||||||
|
process_running = False
|
||||||
try:
|
try:
|
||||||
process.wait(timeout=2.0 if prev_line_count == 0 else 15.0)
|
process.wait(timeout=2.0 if prev_line_count == 0 else 15.0)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
with open(local_filename, "rt") as f:
|
process_running = True
|
||||||
# read new lines
|
|
||||||
new_lines = f.readlines()
|
|
||||||
if not new_lines:
|
|
||||||
continue
|
|
||||||
output = ''.join(new_lines)
|
|
||||||
print(output)
|
|
||||||
# update task comment with jupyter notebook server links
|
|
||||||
if prev_line_count == 0:
|
|
||||||
task.comment += '\n' + ''.join(line for line in new_lines if 'http://' in line or 'https://' in line)
|
|
||||||
prev_line_count += len(new_lines)
|
|
||||||
|
|
||||||
os.lseek(fd, 0, 0)
|
with open(local_filename, "rt") as f:
|
||||||
os.ftruncate(fd, 0)
|
# read new lines
|
||||||
continue
|
new_lines = f.readlines()
|
||||||
break
|
if not new_lines:
|
||||||
|
continue
|
||||||
|
output = ''.join(new_lines)
|
||||||
|
print(output)
|
||||||
|
# update task comment with jupyter notebook server links
|
||||||
|
if prev_line_count == 0:
|
||||||
|
task.comment += '\n' + ''.join(line for line in new_lines if 'http://' in line or 'https://' in line)
|
||||||
|
prev_line_count += len(new_lines)
|
||||||
|
|
||||||
|
os.lseek(fd, 0, 0)
|
||||||
|
os.ftruncate(fd, 0)
|
||||||
|
|
||||||
# cleanup
|
# cleanup
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
|
@ -151,7 +151,7 @@ def train():
|
|||||||
def feed_dict(train):
|
def feed_dict(train):
|
||||||
"""Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
|
"""Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
|
||||||
if train or FLAGS.fake_data:
|
if train or FLAGS.fake_data:
|
||||||
xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
|
xs, ys = mnist.train.next_batch(FLAGS.batch_size, fake_data=FLAGS.fake_data)
|
||||||
k = FLAGS.dropout
|
k = FLAGS.dropout
|
||||||
else:
|
else:
|
||||||
xs, ys = mnist.test.images, mnist.test.labels
|
xs, ys = mnist.test.images, mnist.test.labels
|
||||||
@ -165,7 +165,7 @@ def train():
|
|||||||
test_writer.add_summary(summary, i)
|
test_writer.add_summary(summary, i)
|
||||||
print('Accuracy at step %s: %s' % (i, acc))
|
print('Accuracy at step %s: %s' % (i, acc))
|
||||||
else: # Record train set summaries, and train
|
else: # Record train set summaries, and train
|
||||||
if i % 100 == 99: # Record execution stats
|
if i % FLAGS.batch_size == FLAGS.batch_size - 1: # Record execution stats
|
||||||
run_metadata = tf.RunMetadata()
|
run_metadata = tf.RunMetadata()
|
||||||
summary, _ = sess.run([merged, train_step],
|
summary, _ = sess.run([merged, train_step],
|
||||||
feed_dict=feed_dict(True),
|
feed_dict=feed_dict(True),
|
||||||
@ -213,5 +213,7 @@ if __name__ == '__main__':
|
|||||||
help='Summaries log directory')
|
help='Summaries log directory')
|
||||||
parser.add_argument('--save_path', default=os.path.join(tempfile.gettempdir(), "model.ckpt"),
|
parser.add_argument('--save_path', default=os.path.join(tempfile.gettempdir(), "model.ckpt"),
|
||||||
help='Save the trained model under this path')
|
help='Save the trained model under this path')
|
||||||
|
parser.add_argument('--batch_size', default=100,
|
||||||
|
help='Batch size for training')
|
||||||
FLAGS, unparsed = parser.parse_known_args()
|
FLAGS, unparsed = parser.parse_known_args()
|
||||||
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||||
|
Loading…
Reference in New Issue
Block a user