diff --git a/README.md b/README.md
index 73ceaa2..7e682eb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,12 @@
 # Tiny Language Models Framework
 
-This repository contains the implementation and resources for the Tiny Language Models Framework project. In this project, we developed small-scale language models to facilitate detailed research into various aspects of large language models (LLMs), particularly in the domain of code.
+This repository contains the implementation and resources for the Tiny Language Models Framework project. In this project, we developed small-scale language models to facilitate detailed research into various aspects of large language models (LLMs), particularly in the domain of code. 
+
+<p align="center">
+  <img src="https://github.com/Modern-Compilers-Lab/Tiny-Language-Models-Framework/assets/86785811/946011ac-90ca-454f-baeb-d74b09a1721c" width="500" >
+</p>
+
+We've also prepared a [TinyLM Starter Notebook on Kaggle](https://www.kaggle.com/code/nairmarwa/tinylm-starter-notebook). This notebook is designed to help you get started quickly with our project. It guides you through training a tiny language model from scratch using our dataset and evaluating its performance on code execution tasks.
 
 ## Project Structure
 
@@ -33,7 +39,11 @@ This repository contains the implementation and resources for the Tiny Language
 
 - `demonstration.ipynb` : Jupyter notebook demonstrating the usage of the models and scripts.
 
-- `eval.py` : Script to evaluate the trained models.
+- `code_execution.py` : Script to evaluate the trained models on the code execution task.
+  
+- `token-level_code_completion.py` : Script to evaluate the trained models on the token-level code completion task.
+
+- `line-level_code_completion.py` : Script to evaluate the trained models on the line-level code completion task.
 
 - `model.py` : Contains the model architecture and related functions.
 
@@ -42,6 +52,7 @@ This repository contains the implementation and resources for the Tiny Language
 - `train.py` : Script to train the models.
 
 ## Requirements
+We've used Python 3.11.7.
 
 To install the required packages, you can use the following:
 
@@ -59,15 +70,15 @@ cd data/
 python tinypy_generator.py --num_programs 1000 --level 1.1 --filename sample_data.txt --deduplicate
 ```
 
+This generation command is just an example to get you started. If you want to train your own model, you'll likely need to generate significantly more data. 
+
 ### Data Preparation
-Prepare the data by running:
+Prepare (tokenize and split) the data by running:
 
 ```bash
 python prepare.py
 ```
 
-This generation command is just an example to get you started. If you want to train your own model, you'll likely need to generate significantly more data. 
-
 ### Training
 Train the model using the following command:
 
@@ -78,10 +89,22 @@ python train.py --batch_size 64 --max_iters 35000 --learning_rate 0.01 --miles 0
 ```
 
 ### Evaluation
-Evaluate the trained model by running:
+Evaluate the trained model on code execution by running:
 
 ```bash
-python eval.py --dataset_dir data --model_name arithmetics_level1_696K
+python code_execution.py --dataset_dir data --model_name arithmetics_level1_696K
+```
+
+Evaluate the trained model on token-level code completion by running:
+
+```bash
+python token-level_code_completion.py --dataset_dir data --model_name arithmetics_level1_696K
+```
+
+Evaluate the trained model on line-level code completion by running:
+
+```bash
+python line-level_code_completion.py --dataset_dir data --model_name arithmetics_level1_696K
 ```
 
 ### Demonstration
@@ -108,9 +131,14 @@ python evaluate.py --checkpoint_dir models/code-llama-finetuned-level1 --test_fi
 #### Demonstration
 To see a demonstration of the model's capabilities, open the generalization/demonstration.ipynb notebook and follow the instructions within.
 
+# Contact
+
+- **Kamel Yamani**: [mky2023@nyu.edu](mailto:mky2023@nyu.edu)
+- **Marwa Naïr**: [mn3620@nyu.edu](mailto:mn3620@nyu.edu)
+
 
 # License
 This project is licensed under the MIT License.
 
 #  Acknowledgements
-Special thanks to all contributors and the community for their support and contribution
+This work was supported in part through the NYU IT High Performance Computing resources, services, and staff expertise.
diff --git a/datasets/dataset-1/.readme.md b/datasets/dataset-1/.readme.md
new file mode 100644
index 0000000..e192ed1
--- /dev/null
+++ b/datasets/dataset-1/.readme.md
@@ -0,0 +1,16 @@
+# DATA DESCRIPTION:
+- Around 1M code snippets generated with the full random code generator script
+
+# DATA OBTENTION:
+
+- dataset obtained by executing: python full_random_code_generator.py --output_file ./data/data.txt
+- python version 3.10.14
+- requires a unix based os (Linux/MacOS)
+
+# META-DATA:
+- ditched code snippets for overflow errors and the likes of it: 0.00%
+- ditched code snippets for zero division errors: 0.94%
+- random state stored in frcg-random-states
+
+# DATA LOCATION:
+- Not yet uploaded
\ No newline at end of file
diff --git a/datasets/dataset-1/frcg-random-states/random_state_2024-09-14_22-39.bin b/datasets/dataset-1/frcg-random-states/random_state_2024-09-14_22-39.bin
new file mode 100644
index 0000000..54e7c59
Binary files /dev/null and b/datasets/dataset-1/frcg-random-states/random_state_2024-09-14_22-39.bin differ
diff --git a/datasets/dataset-1/full_random_code_generator.py b/datasets/dataset-1/full_random_code_generator.py
new file mode 100644
index 0000000..ae4067c
--- /dev/null
+++ b/datasets/dataset-1/full_random_code_generator.py
@@ -0,0 +1,487 @@
+import random
+import re
+from tqdm import tqdm
+from io import StringIO
+from contextlib import redirect_stdout
+import pickle
+import argparse
+import datetime
+import multiprocessing as mp
+from time import sleep
+import signal
+from pathlib import Path
+
+
+cfg_rules = {
+    # Variables and digits
+    "VARIABLE": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ],
+    "DIGIT": [str(i) for i in range(256)],
+
+    # Operators
+    "ARITHMETIC_OPERATOR": ["+", "-", "/", "*"],
+    "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
+    "LOGICAL_OPERATOR_INFIX": ["and", "or"],
+    "LOGICAL_OPERATOR_PREFIX": ["not"],
+    "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", "LOGICAL_OPERATOR_PREFIX"],
+    "OPERATOR": ["ARITHMETIC_OPERATOR"],
+
+    # Formatting
+    "NEW_LINE": ["\n"],
+    "TAB_INDENT": ["\t"],
+    "BRACKET_OPEN": ['('],
+    "BRACKET_CLOSE": [')'],
+    "EQUALS": ["="],
+    "COLON": [":"],
+    "COMMA": [","],
+
+    # Keywords
+    "IF": ["if"],
+    "ELIF": ["elif"],
+    "ELSE": ["else"],
+    "FOR": ["for"],
+    "IN": ["in"],
+    "RANGE": ["range"],
+    "WHILE": ["while"],
+    "PRINT": ["print"],
+
+    # Initializations and assignments
+    "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION",
+                                  "INITIALIZATION"],
+
+    "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
+    
+	"SIMPLE_ASSIGNMENT": ["A_VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE"],
+    
+	"ADVANCED_ASSIGNMENT": ["A_VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE"],
+    
+	"SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
+                                     "ENCLOSED_EXPRESSION",
+                                    ],
+	
+	# Terms and expressions
+    "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
+    "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
+    "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
+    "DISPLAY_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE EXPRESSION_IDENTIFIER",
+                            "EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE DIGIT"],
+
+    # Conditions
+    "SIMPLE_IF_STATEMENT": ["IF SPACE CONDITION SPACE COLON NEW_LINE"],
+    "ADVANCED_IF_STATEMENT": ["IF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
+    "SIMPLE_ELIF_STATEMENT": ["ELIF SPACE CONDITION SPACE COLON NEW_LINE"],
+    "ADVANCED_ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
+    "ELSE_STATEMENT": ["ELSE SPACE COLON NEW_LINE"],
+
+    "CHAIN_CONDITION": ["CHAIN_CONDITION SPACE LOGICAL_OPERATOR_INFIX SPACE ENCLOSED_CONDITION", 
+                        "LOGICAL_OPERATOR_PREFIX SPACE ENCLOSED_CONDITION", 
+                        "ENCLOSED_CONDITION"],
+    "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
+    "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
+    "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", 
+                                "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
+    "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 
+
+    # For loops
+    "FOR_HEADER": ["FOR SPACE VARIABLE SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON NEW_LINE", 
+                    "FOR SPACE VARIABLE SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON NEW_LINE"],
+    "INITIAL": ["DIGIT"],
+
+    "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],
+    "ADVANCED_FOR_LOOP": ["FOR_LOOP",
+						  "FOR_HEADER NEW_LINE TAB_INDENT ADVANCED_DISPLAY"],
+	
+	# While 
+	"WHILE_LOOP_LESS": ["WHILE_HEADER_LESS TAB_INDENT UPDATE_LESS"],
+	"WHILE_HEADER_LESS": ["WHILE_CONTROL_INITIALIZATION WHILE SPACE CONDITION_EXPRESSION_LESS SPACE COLON NEW_LINE"],
+	"CONDITION_EXPRESSION_LESS": ["EXPRESSION_IDENTIFIER_WHILE SPACE RELATIONAL_OPERATOR_LESS SPACE FINAL_LESS"],
+	"UPDATE_LESS": ["WHILE_IDENTIFIER SPACE EQUALS SPACE WHILE_IDENTIFIER SPACE + SPACE STEP NEW_LINE"],
+	"RELATIONAL_OPERATOR_LESS": [ "<", "<="],
+
+	"WHILE_LOOP_GREATER": ["WHILE_HEADER_GREATER TAB_INDENT UPDATE_GREATER"],
+	"WHILE_HEADER_GREATER": ["WHILE_CONTROL_INITIALIZATION WHILE SPACE CONDITION_EXPRESSION_GREATER SPACE COLON NEW_LINE"],
+	"CONDITION_EXPRESSION_GREATER": ["EXPRESSION_IDENTIFIER_WHILE SPACE RELATIONAL_OPERATOR_GREATER SPACE FINAL_GREATER"],
+	"UPDATE_GREATER": ["WHILE_IDENTIFIER SPACE EQUALS SPACE WHILE_IDENTIFIER SPACE - SPACE STEP NEW_LINE"],
+	"RELATIONAL_OPERATOR_GREATER": [">", ">="],
+
+	"WHILE_CONTROL_INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
+	
+	# Displaying 
+	"DISPLAY" : ["PRINT BRACKET_OPEN DISPLAY_IDENTIFIER BRACKET_CLOSE NEW_LINE"],
+	"ADVANCED_DISPLAY" : ["DISPLAY",
+					   	  "PRINT BRACKET_OPEN DISPLAY_EXPRESSION BRACKET_CLOSE NEW_LINE"],
+	# Temporary ...						 
+	"END" : [""]
+}
+pattern_vocabulary = {
+	"INITIALIZATION",
+    "SIMPLE_ASSIGNMENT",
+    "ADVANCED_ASSIGNMENT",
+    "SIMPLE_IF_STATEMENT",
+    "SIMPLE_ELIF_STATEMENT",
+    "ELSE_STATEMENT",
+    "WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+	"DISPLAY",
+	"ADVANCED_DISPLAY"
+}
+
+loop_statements = {
+    "WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+}
+
+conditional_statements = {
+	"SIMPLE_IF_STATEMENT",
+    "SIMPLE_ELIF_STATEMENT",
+}
+
+indentation_statements = {
+    "WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+	"SIMPLE_IF_STATEMENT",
+    "SIMPLE_ELIF_STATEMENT",
+	"ELSE_STATEMENT"
+}
+
+non_indentation_statements = pattern_vocabulary - indentation_statements
+
+variable_creation_statements = {
+	"INITIALIZATION",
+    "SIMPLE_ASSIGNMENT",
+    "ADVANCED_ASSIGNMENT",
+	"WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+}
+
+pattern_vocab_for_regex = "|".join(pattern_vocabulary)
+
+def generate_code(symbol, assigned_identifiers:list, x:float, for_init_step)->str:
+	"""
+	Generate code recursively based on the context-free grammar rules.
+
+	Parameters:
+	- symbol (str): The symbol to generate code for.
+	- assigned_identifiers (dict): Dictionary of assigned identifiers and their values.
+	- last_variable (set): Set of the last used variables.
+	- parent (Node): Parent node in the syntax tree.
+
+	Returns:
+	- str: The generated code.
+	"""
+	#node = Node(symbol, parent=parent)
+
+	# If the symbol is a non-terminal <--> it's a production rule (PR)
+	if symbol in cfg_rules:
+		# We develop the PR
+		rule = random.choice(cfg_rules[symbol])
+		symbols = rule.split(" ")
+		# We call the generate code function to get the string associated with this PR
+		generated_symbols = [generate_code(s, assigned_identifiers, x, for_init_step) for s in symbols]
+		res_string = ''.join(generated_symbols)
+		# If it's an INITIAL=>DIGIT PR , we record the DIGIT=>0..255 value in the for_init_step dictionary (will be used when calculating the FINAL of the for loop)
+		if symbol == "INITIAL":
+			init = generated_symbols[0]
+			for_init_step["initial_value"] = init
+		# Elif it's an INITIALIZATION PR, we record the generated VARIABLE and it's DIGIT value in the assigned_identifiers dictionary
+		elif symbol in variable_creation_statements:
+			if symbol == "FOR_HEADER":
+				variable_name = generated_symbols[2]
+			else:
+				variable_name = res_string[0]  
+			assigned_identifiers.append(variable_name)
+		elif symbol == "WHILE_CONTROL_INITIALIZATION":
+			for_init_step["initial_var"] = generated_symbols[0]
+			for_init_step["initial_value"] = generated_symbols[4]
+		# Concatenate the generated_sub_codes and return the resulting sub_code
+		return res_string
+
+	# Else the symbol is a (meta-)terminal, a terminal being one that is returned as is (the simplest case), and a meta-terminal must be generated based on past generations   
+	# If EXPRESSION_IDENTIFIER (like we find in ASSIGNEMENTS, DISPLAYS, and FOR loops), we choose randomly among one of the previously initialized variables
+	# NOTE: FOR loops don't require the control variable to be initialized -> this could be a point of generalization
+	if symbol == "EXPRESSION_IDENTIFIER":
+		identifier = random.choice(assigned_identifiers if assigned_identifiers else random.choice(cfg_rules["DIGIT"]))
+		return identifier
+	# If EXPRESSION_IDENTIFIER_WHILE (i.e. "the declaration" of the control variable of the while loop)
+	# NOTE: this one contrary to for loop ... must be one of the existing initialized variables
+	if symbol == "EXPRESSION_IDENTIFIER_WHILE":
+		return for_init_step["initial_var"]    
+	# If WHILE_IDENTIFIER (i.e. the "update" of the control variable of the while loop), get it from the for_init_step dictionary (filled by the EXPRESSION_IDENTIFIER_WHILE meta-terminal)
+	if symbol == "WHILE_IDENTIFIER":
+		return for_init_step.get("initial_var", "*")
+	# If the symbol is a FINAL (for the for loop) or FINAL_LESS (for the while <= loop), choose a step and number of executions, compute the FINAL/_LESS using the for_init_step dict, and record the setp for the for loop as it will be needed later to fill the STEP meta-terminal
+	if (symbol == "FINAL") or (symbol == "FINAL_LESS"):    
+		initial_value = for_init_step.get("initial_value", "0")
+		# Generate valid step_value and execution_count
+		valid_values = [(1, 2), (2, 1), (2, 2), (2, 3), (3, 2)]
+		step_value, execution_count = random.choice(valid_values)
+		for_init_step["step"] = str(step_value)
+		final_value = step_value * execution_count + int(initial_value) - 1
+		return str(final_value)
+	# Same thing as for the one before but this one is only meant for the while loop
+	if symbol == "FINAL_GREATER":
+		initial_value = for_init_step.get("initial_value", "0")
+		# Generate valid step_value and execution_count
+		valid_values = [(1, 2), (2, 1), (2, 2), (2, 3), (3, 2)]
+		step_value, execution_count = random.choice(valid_values)
+		for_init_step["step"] = str(step_value)
+		final_value = int(initial_value) - step_value * execution_count + 1
+		return str(final_value)
+	# If the STEP meta variable, fill it with the for_init_step dict  
+	if symbol == "STEP":
+		return for_init_step.get("step", "0")
+
+	# If the symbol is an assigned variable, we try to write to an existing variable instead of creating new ones with a probability "x" times greater
+	if symbol == "A_VARIABLE":
+		# In case there are available readable and writable identifiers
+		if (read_write_vars := list(set(assigned_identifiers) & set(cfg_rules["VARIABLE"]))):
+			alpha = len(assigned_identifiers) / len(cfg_rules["VARIABLE"])
+			p = ((1-alpha)*x - alpha)/((1-alpha)*(1+x))
+			# We return an existing read_write_var with the appropriate probability
+			if random.random() < p:
+				return random.choice(read_write_vars)
+		# In case there is no read_write_var or the probability failed			
+		return random.choice(cfg_rules["VARIABLE"])
+	
+	# If DISPLAY_IDENTIFIER, fill it with either the last variable (if there was an ASSIGNEMENTS), or any randomly chosen variable 
+	if symbol == "DISPLAY_IDENTIFIER":
+		try:
+			return f"{random.choice(assigned_identifiers)}"
+		except Exception:
+			return random.choice(cfg_rules["DIGIT"])
+	# If non of the above i.e. its a terminal (not a meta-terminal)
+	return symbol
+
+# Regular expressions
+re_pattern_line_parser = re.compile("(\t*)("+pattern_vocab_for_regex+")(:[^,=]+=[^,=]+(?:,[^,=]+=[^,=]+)*$|$)")
+re_general_line_finder = re.compile(".+(?:\n|$)")
+re_while_identifier = re.compile(".*\nwhile ([a-z])")
+max_depth = 3
+max_sub_blocks = 3
+
+def distribution_controller(min_init,
+							min_length,
+							max_length,
+							line_counter,
+							context_stack)->dict:
+	
+	# If the line_counter is less the min_init we return an INITIALIZATION
+	if line_counter <= min_init:
+		return {"INITIALIZATION": 1.0}
+	
+	# Elif it's above max_length
+	if line_counter > max_length:
+		# If we can end the code here i.e. we aren't at the begining of an indentation block (for now the while loop is not considered ...)
+		if context_stack[-1]["nb_lines_in_block"] != 0:
+				return {"END":1.0}
+		# Else we return a distribution over the statements which do not require an indentation
+		uniproba = 1/len(non_indentation_statements)
+		return {keyword : uniproba for keyword in non_indentation_statements} 
+	
+	## In other cases i.e. min_init < line_counter <= max_length
+	
+	# We set the potential keywords
+	potential_keywords = set(pattern_vocabulary)
+
+	# In case we achieved max_depth or max_sub_blocks inside the current context we remove the indentation statements
+	if len(context_stack) - 1 >=  max_depth or context_stack[-1]["nb_sub_blocks"] >= max_sub_blocks:
+		potential_keywords.difference_update(indentation_statements)
+
+	# In case we are not in an If statement we remove the elif + else
+	elif not context_stack[-1]["if_statement"]:
+		potential_keywords.difference_update({"SIMPLE_ELIF_STATEMENT", "ELSE_STATEMENT"})
+	
+	# We add the END keyword if we are not at the begining of an indentation block
+	if context_stack[-1]["nb_lines_in_block"] != 0:
+		potential_keywords.add("END")
+
+	# We return a uniform distribution over the remaining keywords
+	uniproba = 1/len(potential_keywords)
+	return {potential_keyword: uniproba for potential_keyword in potential_keywords}
+
+
+def generate_random_code(min_init = 2,
+						 min_length = 2,
+						 max_length = 15,
+						 max_init_count = 3,
+						 decay_factor = 0.5,
+						 x = 2
+						 ):
+	
+	# We create the code_lines list, the context_stack and initialize it
+	code_lines = list()
+	context_stack = list()
+	context_stack.append(
+		{
+			"nb_sub_blocks": 0,
+			"if_statement": False,
+			"readable_variables": list(),
+			"writable_variables": list(cfg_rules["VARIABLE"]),
+			"nb_lines_in_block": 0,
+		}
+	)
+
+	# We set the line_counter to 0 and the new_pattern_line to empty string
+	line_counter = 1
+	new_pattern_line = ""
+
+	# While we didn't reach the END keyword
+	while new_pattern_line != "END":
+
+		# We get the distribution from the distribution controller
+		new_distribution = distribution_controller(min_init, min_length, max_length, line_counter, context_stack)
+		
+		# We uniformly randomly choose a random keyword from the distribution 
+		new_pattern_line = random.choices(list(new_distribution.keys()), list(new_distribution.values()))[0]
+		
+		# We set the "VARIABLES" PR to the current context
+		cfg_rules["VARIABLE"] = context_stack[-1]["writable_variables"]
+		
+		# We generate the code using the grammar
+		new_code_line = generate_code(new_pattern_line, context_stack[-1]["readable_variables"], x, dict()).replace("SPACE", " ")
+		
+		# We append the new_code_line to the code_lines (think about replacing this one with the random expression)
+		code_lines.append("\n".join([(len(context_stack)-1) * "\t" + new_code_line for new_code_line in new_code_line.split("\n")[:-1]])+"\n")
+		
+		## Update the context
+		
+		# Update the if statement state of the context
+		if new_pattern_line in conditional_statements:
+			context_stack[-1]["if_statement"] = True
+		else:
+			context_stack[-1]["if_statement"] = False
+		
+		# Update the number of sub loops in the context
+		if new_pattern_line in indentation_statements:
+			context_stack[-1]["nb_sub_blocks"] += 1
+		
+		# Update the number of code lines in the context
+		lines_to_add = 3 if new_pattern_line in ("WHILE_LOOP_LESS", "WHILE_LOOP_GREATER") else 1
+		context_stack[-1]["nb_lines_in_block"] += lines_to_add
+		line_counter += lines_to_add
+
+		# In case where we have to indent like for the for loop, while loop and conditionals
+		if new_pattern_line in indentation_statements:
+			new_writable_variables = context_stack[-1]["writable_variables"]
+			
+			# If the indentation statement is a while loop, we remove the control variable from the writable variables
+			if new_pattern_line in ("WHILE_LOOP_LESS", "WHILE_LOOP_GREATER"):
+				while_control_variable = re_while_identifier.match(new_code_line).group(1)
+				new_writable_variables = list(new_writable_variables)
+				new_writable_variables.remove(while_control_variable)
+			
+			# We stack the new indentation level
+			context_stack.append({
+				"nb_sub_blocks": 0,
+				"if_statement": False,
+				"readable_variables": list(context_stack[-1]["readable_variables"]),
+				"writable_variables": new_writable_variables,
+				"nb_lines_in_block": 0,
+			})
+		
+		# Else in case where we might un-indent or stay
+		else:
+			# In case we don't stay i.e. we un-indent, we pop the stack and update the number of lines for the just-before context
+			while len(context_stack)>1 and random.random() > decay_factor ** context_stack[-1]["nb_lines_in_block"]:
+				last_context = context_stack.pop()
+				context_stack[-1]["nb_lines_in_block"] += last_context["nb_lines_in_block"]
+			
+			# We compute the geometrically decreasing staying probability
+			
+	#>> END OF WHILE LOOP: while new_pattern_line != "END"
+	
+	# We append to the code_lines a display/advanced_display statement
+	code_lines[-1] = generate_code(
+			symbol = random.choice(("DISPLAY", "ADVANCED_DISPLAY")),
+			assigned_identifiers = context_stack[0]["readable_variables"],
+			x = x,
+			for_init_step = None
+		).replace("SPACE", " ")
+	
+	# We join the code_lines to obtain the final code	
+	code = "".join(code_lines)
+	
+	# We set the VARIABLE PR back to its original state
+	cfg_rules["VARIABLE"] = context_stack[0]["writable_variables"]
+	
+	return code
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = "Full Random TinyPy Generator")
+	parser.add_argument("--random_state", help = "Random state to be loaded if any")
+	parser.add_argument("--nb_programs", default = 1000000, help = "Number of programs to be generated")
+	parser.add_argument("--output_file", default = "./data.txt", help = "Number of programs to be generated")
+	parser.add_argument("--timeout", default = 2, help = "Number of seconds to wait for a process to terminate")
+	args = parser.parse_args()
+	
+	random_state =  args.random_state
+	output_file = args.output_file
+	nb_programs = args.nb_programs
+	timeout = args.timeout
+
+	# Making sure the output file exists by creating it before hand
+	output_file_path_object = Path(output_file)
+	output_file_path_object.parent.mkdir(parents = True, exist_ok = True)
+	output_file_path_object.touch(exist_ok = True)
+
+	# Saving or setting the random state
+	if args.random_state is None:
+		random_state = random.getstate()
+		now = datetime.datetime.now()
+		date_hour = now.strftime("%Y-%m-%d_%H-%M")
+		Path("./frcg-random-states").mkdir(parents = True, exist_ok = True)
+		with open(f"frcg-random-states/random_state_{date_hour}.bin", "wb") as f:
+			pickle.dump(random_state, f)
+	else:
+		with open(args.random_state, "rb") as f:
+			random_state = pickle.load(f)
+		random.setstate(random_state)
+	
+	## Launching the generation
+	class TimeoutException(Exception):
+		pass
+
+	def timeout_handler(signum, frame):
+		raise TimeoutException()
+
+	signal.signal(signal.SIGALRM, timeout_handler)
+	
+	f = open(output_file, "w")
+
+	nb_timeouts = 0
+	nb_zero_divisions = 0
+	
+	# Launching the loop
+	for i in tqdm(range(nb_programs)):
+		code = generate_random_code()
+		sio = StringIO()
+		try:
+			with redirect_stdout(sio):
+				signal.alarm(timeout)
+				exec(code, dict())
+				output = sio.getvalue()
+		except ZeroDivisionError:
+			output = "ZeroDivisionError"
+			nb_zero_divisions += 1
+		except ValueError:
+			nb_timeouts += 1
+			output = "ValueError"
+		except OverflowError:
+			nb_timeouts += 1
+			output = "OverflowError"
+		except TimeoutException as e:
+			nb_timeouts += 1
+			output = "TimeoutError"
+		finally:
+			signal.alarm(0)
+		
+		f.write(code + "# output\n# " + "\n# ".join(output.split("\n")[:-1]) + "\n\n")
+	
+	print(f"percentage of timeouts: {nb_timeouts/nb_programs * 100:.2f}%")
+	print(f"percentage of zero divisions: {nb_zero_divisions/nb_programs * 100:.2f}%")
+
+	f.close()
\ No newline at end of file
diff --git a/datasets/dataset-2/.readme.md b/datasets/dataset-2/.readme.md
new file mode 100644
index 0000000..5991bb0
--- /dev/null
+++ b/datasets/dataset-2/.readme.md
@@ -0,0 +1,16 @@
+# DATA DESCRIPTION:
+- Exactly 10M code snippets generated with the new version of full random code generator script which keeps generating until the exact requested number of code snippets is generated (instead of just ditching) and which checks for duplication
+
+# DATA OBTENTION:
+
+- dataset obtained by executing: python full_random_code_generator.py --nb_programs 10000000 --log_interval 100000 --programs_separator "# code" --output_file ./data/data.txt
+- python version 3.10.14
+- requires a unix based os (Linux/MacOS)
+
+# META-DATA:
+- code snippets for overflow errors and the likes of it: 0.00%
+- code snippets for zero division errors: 0.94%
+- random state stored in frcg-random-states
+
+# DATA LOCATION:
+- Not yet uploaded
\ No newline at end of file
diff --git a/datasets/dataset-2/datapreps/dataprep-1/data-preping-log.txt b/datasets/dataset-2/datapreps/dataprep-1/data-preping-log.txt
new file mode 100644
index 0000000..bceb648
--- /dev/null
+++ b/datasets/dataset-2/datapreps/dataprep-1/data-preping-log.txt
@@ -0,0 +1,40 @@
+
+Saving the numpy random state:
+	--> saving it
+	--> freeing its memory
+
+Loading the dataset:
+
+Get all the unique characters that occur in this text:
+	--> all the unique characters: '\t\n !#%()*+,-./0123456789:<=>abcdefghijklmnopqrstuvwxyz'
+	--> vocab size: 54
+
+Create a mapping from characters to integers:
+
+Save the meta information as well, to help us encode/decode later:
+	--> freeing its memory
+
+Split by examples using \n\n:
+	--> splitting
+	--> freeing data memory
+	--> total number of examples: 12,000,000
+
+
+Shuffle the examples adn split into train, test and val:
+	--> shuffling
+	--> freeing examples memory
+
+Join the examples back into strings:
+
+Save train, val, and test sets to separate files:
+
+We define the encoding function:
+
+Encode both to integers:
+	--> encoding train_data
+	--> encoding val_data
+	--> train has 2,193,497,505 tokens for 9,600,000 examples
+	--> val has 273,765,818 tokens for 1,200,000 examples
+
+Export to bin files
+:
diff --git a/datasets/dataset-2/datapreps/dataprep-1/data-preping.py b/datasets/dataset-2/datapreps/dataprep-1/data-preping.py
new file mode 100644
index 0000000..fae7bd0
--- /dev/null
+++ b/datasets/dataset-2/datapreps/dataprep-1/data-preping.py
@@ -0,0 +1,135 @@
+# Data preping (on Greene)
+DIR = "/scratch/yb2618/Tiny-Language-Models-Framework/datasets/dataset-2/datapreps/dataprep-1"
+
+import pickle
+import numpy as np
+import gc
+
+# Logging boilerplate
+log_file = open(DIR+"data-preping-log.txt", "w")
+def log(s:str):
+	if s[0].upper() == s[0]:
+		start = "\n"
+		end = ":"
+	else:
+		start = "	--> "
+		end = ""
+	log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+# Saving the numpy random state
+log("Saving the numpy random state")
+log("saving it")
+np_random_state = np.random.get_state()
+with open(DIR+"data/np-random-state.bin", "wb") as f:
+	pickle.dump(np_random_state, f)
+log("freeing its memory")
+del np_random_state
+gc.collect()
+
+# Loading the dataset
+log("Loading the dataset")
+with open(DIR+"datasets/dataset-2/data/data.txt", "r") as f:
+	data = f.read()
+
+# Get all the unique characters that occur in this text
+log("Get all the unique characters that occur in this text")
+chars = sorted(list(set(data)))
+vocab_size = len(chars)
+log("all the unique characters: " + repr(''.join(chars)))
+log(f"vocab size: {vocab_size:,}")
+
+# Create a mapping from characters to integers
+log("Create a mapping from characters to integers")
+stoi = { ch:i for i,ch in enumerate(chars) }
+itos = { i:ch for i,ch in enumerate(chars) }
+
+# Save the meta information as well, to help us encode/decode later
+log("Save the meta information as well, to help us encode/decode later")
+meta = {
+	'vocab_size': vocab_size,
+	'itos': itos,
+	'stoi': stoi,
+}
+with open(DIR+'data/meta.pkl', 'wb') as f:
+    pickle.dump(meta, f)
+log("freeing its memory")
+del meta
+gc.collect()
+
+# Split by examples using \n\n
+log("Split by examples using \\n\\n")
+log("splitting")
+examples = data.split("\n\n")[:-1]
+log("freeing data memory")
+del data
+gc.collect()
+n = len(examples)
+log(f"total number of examples: {n:,}\n")
+
+# Shuffle the examples adn split into train, test and val
+log("Shuffle the examples adn split into train, test and val")
+log("shuffling")
+np.random.shuffle(examples)
+train_examples = examples[:int(n*0.8)]
+val_examples = examples[int(n*0.8):int(n*0.9)]
+test_examples = examples[int(n*0.9):]
+log("freeing examples memory")
+del examples
+gc.collect()
+
+# Join the examples back into strings
+log("Join the examples back into strings")
+train_data = "\n\n".join(train_examples)
+train_examples_len = len(train_examples)
+del train_examples
+gc.collect()
+val_data = "\n\n".join(val_examples)
+val_examples_len = len(val_examples)
+del val_examples
+gc.collect()
+test_data = "\n\n".join(test_examples)
+test_examples_len = len(test_examples)
+del test_examples
+gc.collect()
+
+# Save train.txt, val.txt, and test.txt sets to separate files
+log("Save train, val, and test sets to separate files")
+with open(DIR+"data/train.txt", 'w') as f:
+	f.write(train_data)
+with open(DIR+"data/val.txt", 'w') as f:
+	f.write(val_data)
+with open(DIR+"data/test.txt", 'w') as f:
+	f.write(test_data)
+
+# We define the encoding function
+log("We define the encoding function")
+def encode(s:str)->str:
+	return [stoi[c] for c in s]
+     
+# Encode both to integers
+log("Encode both to integers")
+log("encoding train_data")
+train_ids = encode(train_data)
+del train_data
+gc.collect()
+log("encoding val_data")
+val_ids = encode(val_data)
+del val_data
+gc.collect()
+
+log(f"train has {len(train_ids):,} tokens for {train_examples_len:,} examples")
+log(f"val has {len(val_ids):,} tokens for {val_examples_len:,} examples")
+
+# Export to bin files
+log("Export to bin files\n")
+train_ids = np.array(train_ids, dtype=np.uint16)
+train_ids.tofile(DIR+"data/train.bin")
+del train_ids
+gc.collect()
+val_ids = np.array(val_ids, dtype=np.uint16)
+val_ids.tofile(DIR+"data/val.bin")
+del val_ids
+gc.collect()
+
+log_file.close()
\ No newline at end of file
diff --git a/datasets/dataset-2/frcg-random-states/random_state_2024-09-17_10-34.bin b/datasets/dataset-2/frcg-random-states/random_state_2024-09-17_10-34.bin
new file mode 100644
index 0000000..49eeca5
Binary files /dev/null and b/datasets/dataset-2/frcg-random-states/random_state_2024-09-17_10-34.bin differ
diff --git a/datasets/dataset-2/full_random_code_generator.py b/datasets/dataset-2/full_random_code_generator.py
new file mode 100644
index 0000000..3e095ab
--- /dev/null
+++ b/datasets/dataset-2/full_random_code_generator.py
@@ -0,0 +1,557 @@
+import random
+import re
+# from tqdm import tqdm
+from io import StringIO
+from contextlib import redirect_stdout
+import pickle
+import argparse
+import datetime
+import multiprocessing as mp
+from time import sleep
+import signal
+import hashlib
+from pathlib import Path
+
+cfg_rules = {
+    # Variables and digits
+    "VARIABLE": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ],
+    "DIGIT": [str(i) for i in range(256)],
+
+    # Operators
+    "ARITHMETIC_OPERATOR": ["+", "-", "/", "*", "%"],
+    "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
+    "LOGICAL_OPERATOR_INFIX": ["and", "or"],
+    "LOGICAL_OPERATOR_PREFIX": ["not"],
+    "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", "LOGICAL_OPERATOR_PREFIX"],
+    "OPERATOR": ["ARITHMETIC_OPERATOR"],
+
+    # Formatting
+    "NEW_LINE": ["\n"],
+    "TAB_INDENT": ["\t"],
+    "BRACKET_OPEN": ['('],
+    "BRACKET_CLOSE": [')'],
+    "EQUALS": ["="],
+    "COLON": [":"],
+    "COMMA": [","],
+
+    # Keywords
+    "IF": ["if"],
+    "ELIF": ["elif"],
+    "ELSE": ["else"],
+    "FOR": ["for"],
+    "IN": ["in"],
+    "RANGE": ["range"],
+    "WHILE": ["while"],
+    "PRINT": ["print"],
+
+    # Initializations and assignments
+    "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION",
+                                  "INITIALIZATION"],
+
+    "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
+    
+	"SIMPLE_ASSIGNMENT": ["A_VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE"],
+    
+	"ADVANCED_ASSIGNMENT": ["A_VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE"],
+    
+	"SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
+                                     "ENCLOSED_EXPRESSION",
+                                    ],
+	
+	# Terms and expressions
+    "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
+    "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
+    "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
+    "DISPLAY_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE EXPRESSION_IDENTIFIER",
+                            "EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE DIGIT"],
+
+    # Conditions
+    "SIMPLE_IF_STATEMENT": ["IF SPACE CONDITION SPACE COLON NEW_LINE"],
+    "ADVANCED_IF_STATEMENT": ["IF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
+    "SIMPLE_ELIF_STATEMENT": ["ELIF SPACE CONDITION SPACE COLON NEW_LINE"],
+    "ADVANCED_ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
+    "ELSE_STATEMENT": ["ELSE SPACE COLON NEW_LINE"],
+
+    "CHAIN_CONDITION": ["CHAIN_CONDITION SPACE LOGICAL_OPERATOR_INFIX SPACE ENCLOSED_CONDITION", 
+                        "LOGICAL_OPERATOR_PREFIX SPACE ENCLOSED_CONDITION", 
+                        "ENCLOSED_CONDITION"],
+    "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
+    "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
+    "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", 
+                                "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
+    "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 
+
+    # For loops
+    "FOR_HEADER": ["FOR SPACE VARIABLE SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON NEW_LINE", 
+                    "FOR SPACE VARIABLE SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON NEW_LINE"],
+    "INITIAL": ["DIGIT"],
+
+    "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],
+    "ADVANCED_FOR_LOOP": ["FOR_LOOP",
+						  "FOR_HEADER NEW_LINE TAB_INDENT ADVANCED_DISPLAY"],
+	
+	# While 
+	"WHILE_LOOP_LESS": ["WHILE_HEADER_LESS TAB_INDENT UPDATE_LESS"],
+	"WHILE_HEADER_LESS": ["WHILE_CONTROL_INITIALIZATION WHILE SPACE CONDITION_EXPRESSION_LESS SPACE COLON NEW_LINE"],
+	"CONDITION_EXPRESSION_LESS": ["EXPRESSION_IDENTIFIER_WHILE SPACE RELATIONAL_OPERATOR_LESS SPACE FINAL_LESS"],
+	"UPDATE_LESS": ["WHILE_IDENTIFIER SPACE EQUALS SPACE WHILE_IDENTIFIER SPACE + SPACE STEP NEW_LINE"],
+	"RELATIONAL_OPERATOR_LESS": [ "<", "<="],
+
+	"WHILE_LOOP_GREATER": ["WHILE_HEADER_GREATER TAB_INDENT UPDATE_GREATER"],
+	"WHILE_HEADER_GREATER": ["WHILE_CONTROL_INITIALIZATION WHILE SPACE CONDITION_EXPRESSION_GREATER SPACE COLON NEW_LINE"],
+	"CONDITION_EXPRESSION_GREATER": ["EXPRESSION_IDENTIFIER_WHILE SPACE RELATIONAL_OPERATOR_GREATER SPACE FINAL_GREATER"],
+	"UPDATE_GREATER": ["WHILE_IDENTIFIER SPACE EQUALS SPACE WHILE_IDENTIFIER SPACE - SPACE STEP NEW_LINE"],
+	"RELATIONAL_OPERATOR_GREATER": [">", ">="],
+
+	"WHILE_CONTROL_INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
+	
+	# Displaying 
+	"DISPLAY" : ["PRINT BRACKET_OPEN DISPLAY_IDENTIFIER BRACKET_CLOSE NEW_LINE"],
+	"ADVANCED_DISPLAY" : ["DISPLAY",
+					   	  "PRINT BRACKET_OPEN DISPLAY_EXPRESSION BRACKET_CLOSE NEW_LINE"],
+	# Temporary ...						 
+	"END" : [""]
+}
+pattern_vocabulary = {
+	"INITIALIZATION",
+    "SIMPLE_ASSIGNMENT",
+    "ADVANCED_ASSIGNMENT",
+    "SIMPLE_IF_STATEMENT",
+    "SIMPLE_ELIF_STATEMENT",
+    "ELSE_STATEMENT",
+    "WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+	"DISPLAY",
+	"ADVANCED_DISPLAY"
+}
+
+loop_statements = {
+    "WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+}
+
+conditional_statements = {
+	"SIMPLE_IF_STATEMENT",
+    "SIMPLE_ELIF_STATEMENT",
+}
+
+indentation_statements = {
+    "WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+	"SIMPLE_IF_STATEMENT",
+    "SIMPLE_ELIF_STATEMENT",
+	"ELSE_STATEMENT"
+}
+
+non_indentation_statements = pattern_vocabulary - indentation_statements
+
+variable_creation_statements = {
+	"INITIALIZATION",
+    "SIMPLE_ASSIGNMENT",
+    "ADVANCED_ASSIGNMENT",
+	"WHILE_LOOP_LESS",
+	"WHILE_LOOP_GREATER",
+    "FOR_HEADER",
+}
+
+pattern_vocab_for_regex = "|".join(pattern_vocabulary)
+
+def generate_code(symbol, assigned_identifiers:list, x:float, for_init_step)->str:
+	"""
+	Generate code recursively based on the context-free grammar rules.
+
+	Parameters:
+	- symbol (str): The symbol to generate code for.
+	- assigned_identifiers (dict): Dictionary of assigned identifiers and their values.
+	- last_variable (set): Set of the last used variables.
+	- parent (Node): Parent node in the syntax tree.
+
+	Returns:
+	- str: The generated code.
+	"""
+	#node = Node(symbol, parent=parent)
+
+	# If the symbol is a non-terminal <--> it's a production rule (PR)
+	if symbol in cfg_rules:
+		# We develop the PR
+		rule = random.choice(cfg_rules[symbol])
+		symbols = rule.split(" ")
+		# We call the generate code function to get the string associated with this PR
+		generated_symbols = [generate_code(s, assigned_identifiers, x, for_init_step) for s in symbols]
+		res_string = ''.join(generated_symbols)
+		# If it's an INITIAL=>DIGIT PR , we record the DIGIT=>0..255 value in the for_init_step dictionary (will be used when calculating the FINAL of the for loop)
+		if symbol == "INITIAL":
+			init = generated_symbols[0]
+			for_init_step["initial_value"] = init
+		# Elif it's an INITIALIZATION PR, we record the generated VARIABLE and it's DIGIT value in the assigned_identifiers dictionary
+		elif symbol in variable_creation_statements:
+			if symbol == "FOR_HEADER":
+				variable_name = generated_symbols[2]
+			else:
+				variable_name = res_string[0]  
+			assigned_identifiers.append(variable_name)
+		elif symbol == "WHILE_CONTROL_INITIALIZATION":
+			for_init_step["initial_var"] = generated_symbols[0]
+			for_init_step["initial_value"] = generated_symbols[4]
+		# Concatenate the generated_sub_codes and return the resulting sub_code
+		return res_string
+
+	# Else the symbol is a (meta-)terminal, a terminal being one that is returned as is (the simplest case), and a meta-terminal must be generated based on past generations   
+	# If EXPRESSION_IDENTIFIER (like we find in ASSIGNEMENTS, DISPLAYS, and FOR loops), we choose randomly among one of the previously initialized variables
+	# NOTE: FOR loops don't require the control variable to be initialized -> this could be a point of generalization
+	if symbol == "EXPRESSION_IDENTIFIER":
+		identifier = random.choice(assigned_identifiers if assigned_identifiers else random.choice(cfg_rules["DIGIT"]))
+		return identifier
+	# If EXPRESSION_IDENTIFIER_WHILE (i.e. "the declaration" of the control variable of the while loop)
+	# NOTE: this one contrary to for loop ... must be one of the existing initialized variables
+	if symbol == "EXPRESSION_IDENTIFIER_WHILE":
+		return for_init_step["initial_var"]    
+	# If WHILE_IDENTIFIER (i.e. the "update" of the control variable of the while loop), get it from the for_init_step dictionary (filled by the EXPRESSION_IDENTIFIER_WHILE meta-terminal)
+	if symbol == "WHILE_IDENTIFIER":
+		return for_init_step.get("initial_var", "*")
+	# If the symbol is a FINAL (for the for loop) or FINAL_LESS (for the while <= loop), choose a step and number of executions, compute the FINAL/_LESS using the for_init_step dict, and record the setp for the for loop as it will be needed later to fill the STEP meta-terminal
+	if (symbol == "FINAL") or (symbol == "FINAL_LESS"):    
+		initial_value = for_init_step.get("initial_value", "0")
+		# Generate valid step_value and execution_count
+		valid_values = [(1, 2), (2, 1), (2, 2), (2, 3), (3, 2)]
+		step_value, execution_count = random.choice(valid_values)
+		for_init_step["step"] = str(step_value)
+		final_value = step_value * execution_count + int(initial_value) - 1
+		return str(final_value)
+	# Same thing as for the one before but this one is only meant for the while loop
+	if symbol == "FINAL_GREATER":
+		initial_value = for_init_step.get("initial_value", "0")
+		# Generate valid step_value and execution_count
+		valid_values = [(1, 2), (2, 1), (2, 2), (2, 3), (3, 2)]
+		step_value, execution_count = random.choice(valid_values)
+		for_init_step["step"] = str(step_value)
+		final_value = int(initial_value) - step_value * execution_count + 1
+		return str(final_value)
+	# If the STEP meta variable, fill it with the for_init_step dict  
+	if symbol == "STEP":
+		return for_init_step.get("step", "0")
+
+	# If the symbol is an assigned variable, we try to write to an existing variable instead of creating new ones with a probability "x" times greater
+	if symbol == "A_VARIABLE":
+		# In case there are available readable and writable identifiers
+		if (read_write_vars := list(set(assigned_identifiers) & set(cfg_rules["VARIABLE"]))):
+			alpha = len(assigned_identifiers) / len(cfg_rules["VARIABLE"])
+			p = ((1-alpha)*x - alpha)/((1-alpha)*(1+x))
+			# We return an existing read_write_var with the appropriate probability
+			if random.random() < p:
+				return random.choice(read_write_vars)
+		# In case there is no read_write_var or the probability failed			
+		return random.choice(cfg_rules["VARIABLE"])
+	
+	# If DISPLAY_IDENTIFIER, fill it with either the last variable (if there was an ASSIGNEMENTS), or any randomly chosen variable 
+	if symbol == "DISPLAY_IDENTIFIER":
+		try:
+			return f"{random.choice(assigned_identifiers)}"
+		except Exception:
+			return random.choice(cfg_rules["DIGIT"])
+	# If non of the above i.e. its a terminal (not a meta-terminal)
+	return symbol
+# Regular expressions
+re_pattern_line_parser = re.compile("(\t*)("+pattern_vocab_for_regex+")(:[^,=]+=[^,=]+(?:,[^,=]+=[^,=]+)*$|$)")
+re_general_line_finder = re.compile(".+(?:\n|$)")
+re_while_identifier = re.compile(".*\nwhile ([a-z])")
+
+max_depth = 3
+max_sub_blocks = 3
+
+def distribution_controller(min_init,
+							min_length,
+							max_length,
+							line_counter,
+							context_stack)->dict:
+	
+	# If the line_counter is less the min_init we return an INITIALIZATION
+	if line_counter <= min_init:
+		return {"INITIALIZATION": 1.0}
+	
+	# Elif it's above max_length
+	if line_counter > max_length:
+		# If we can end the code here i.e. we aren't at the begining of an indentation block (for now the while loop is not considered ...)
+		if context_stack[-1]["nb_lines_in_block"] != 0:
+				return {"END":1.0}
+		# Else we return a distribution over the statements which do not require an indentation
+		uniproba = 1/len(non_indentation_statements)
+		return {keyword : uniproba for keyword in non_indentation_statements} 
+	
+	## In other cases i.e. min_init < line_counter <= max_length
+	
+	# We set the potential keywords
+	potential_keywords = set(pattern_vocabulary)
+
+	# In case we achieved max_depth or max_sub_blocks inside the current context we remove the indentation statements
+	if len(context_stack) - 1 >=  max_depth or context_stack[-1]["nb_sub_blocks"] >= max_sub_blocks:
+		potential_keywords.difference_update(indentation_statements)
+
+	# In case we are not in an If statement we remove the elif + else
+	elif not context_stack[-1]["if_statement"]:
+		potential_keywords.difference_update({"SIMPLE_ELIF_STATEMENT", "ELSE_STATEMENT"})
+	
+	# We add the END keyword if we are not at the begining of an indentation block
+	if context_stack[-1]["nb_lines_in_block"] != 0:
+		potential_keywords.add("END")
+
+	# We return a uniform distribution over the remaining keywords
+	uniproba = 1/len(potential_keywords)
+	return {potential_keyword: uniproba for potential_keyword in potential_keywords}
+
+
+def generate_random_code(min_init = 0,
+						 min_length = 2,
+						 max_length = 15,
+						 max_init_count = 3,
+						 decay_factor = 0.5,
+						 x = 2
+						 ):
+	
+	# We create the code_lines list, the context_stack and initialize it
+	code_lines = list()
+	context_stack = list()
+	context_stack.append(
+		{
+			"nb_sub_blocks": 0,
+			"if_statement": False,
+			"readable_variables": list(),
+			"writable_variables": list(cfg_rules["VARIABLE"]),
+			"nb_lines_in_block": 0,
+		}
+	)
+
+	# We set the line_counter to 0 and the new_pattern_line to empty string
+	line_counter = 1
+	new_pattern_line = ""
+
+	# While we didn't reach the END keyword
+	while new_pattern_line != "END":
+
+		# We get the distribution from the distribution controller
+		new_distribution = distribution_controller(min_init, min_length, max_length, line_counter, context_stack)
+		
+		# We uniformly randomly choose a random keyword from the distribution 
+		new_pattern_line = random.choices(list(new_distribution.keys()), list(new_distribution.values()))[0]
+		
+		# We set the "VARIABLES" PR to the current context
+		cfg_rules["VARIABLE"] = context_stack[-1]["writable_variables"]
+		
+		# We generate the code using the grammar
+		new_code_line = generate_code(new_pattern_line, context_stack[-1]["readable_variables"], x, dict()).replace("SPACE", " ")
+		
+		# We append the new_code_line to the code_lines (think about replacing this one with the random expression)
+		code_lines.append("\n".join([(len(context_stack)-1) * "\t" + new_code_line for new_code_line in new_code_line.split("\n")[:-1]])+"\n")
+		
+		## Update the context
+		
+		# Update the if statement state of the context
+		if new_pattern_line in conditional_statements:
+			context_stack[-1]["if_statement"] = True
+		else:
+			context_stack[-1]["if_statement"] = False
+		
+		# Update the number of sub loops in the context
+		if new_pattern_line in indentation_statements:
+			context_stack[-1]["nb_sub_blocks"] += 1
+		
+		# Update the number of code lines in the context
+		lines_to_add = 3 if new_pattern_line in ("WHILE_LOOP_LESS", "WHILE_LOOP_GREATER") else 1
+		context_stack[-1]["nb_lines_in_block"] += lines_to_add
+		line_counter += lines_to_add
+
+		# In case where we have to indent like for the for loop, while loop and conditionals
+		if new_pattern_line in indentation_statements:
+			new_writable_variables = context_stack[-1]["writable_variables"]
+			
+			# If the indentation statement is a while loop, we remove the control variable from the writable variables
+			if new_pattern_line in ("WHILE_LOOP_LESS", "WHILE_LOOP_GREATER"):
+				while_control_variable = re_while_identifier.match(new_code_line).group(1)
+				new_writable_variables = list(new_writable_variables)
+				new_writable_variables.remove(while_control_variable)
+			
+			# We stack the new indentation level
+			context_stack.append({
+				"nb_sub_blocks": 0,
+				"if_statement": False,
+				"readable_variables": list(context_stack[-1]["readable_variables"]),
+				"writable_variables": new_writable_variables,
+				"nb_lines_in_block": 0,
+			})
+		
+		# Else in case where we might un-indent or stay
+		else:
+			# In case we don't stay i.e. we un-indent, we pop the stack and update the number of lines for the just-before context
+			while len(context_stack)>1 and random.random() > decay_factor ** context_stack[-1]["nb_lines_in_block"]:
+				last_context = context_stack.pop()
+				context_stack[-1]["nb_lines_in_block"] += last_context["nb_lines_in_block"]
+			
+			# We compute the geometrically decreasing staying probability
+			
+	#>> END OF WHILE LOOP: while new_pattern_line != "END"
+	
+	# We append to the code_lines a display/advanced_display statement
+	code_lines[-1] = generate_code(
+			symbol = random.choice(("DISPLAY", "ADVANCED_DISPLAY")),
+			assigned_identifiers = context_stack[0]["readable_variables"],
+			x = x,
+			for_init_step = None
+		).replace("SPACE", " ")
+	
+	# We join the code_lines to obtain the final code	
+	code = "".join(code_lines)
+	
+	# We set the VARIABLE PR back to its original state
+	cfg_rules["VARIABLE"] = context_stack[0]["writable_variables"]
+	
+	return code
+	
+
+if __name__ == "__main__":
+	
+	parser = argparse.ArgumentParser(description = "Full Random TinyPy Generator")
+	
+	parser.add_argument("--random_state", help = "Path to python random state to be loaded if any")
+	parser.add_argument("--nb_programs", default = 100000, help = "Number of programs to be generated")
+	parser.add_argument("--output_file", default = "./data.txt", help = "Number of programs to be generated")
+	parser.add_argument("--timeout", default = 2, help = "Number of seconds to wait for a process to terminate")
+	parser.add_argument("--log_file", default = "./log.txt", help = "The path to the logging file for monitoring progress")
+	parser.add_argument("--log_interval", default = 10000, help = "The number of code snippets generations before logging to the --log_file for monitoring progress")
+	parser.add_argument("--deduplicate", help = "Whether to perform deduplication of generated programs (set to True for true, False for anything else), defaults to True)")
+	parser.add_argument("--max_deduplication_trials", default = 50, help = "The maximum number of consecutive trials when deduplication occurs")
+	parser.add_argument("--programs_separator", default = "", help = "String to put at the top of each code example (Defaults to empty string)")
+	parser.add_argument("--use_tqdm", help = "Whether or not to use tqdm for monitoring progress (set to True for true, False for anything else), defaults to True)")
+	
+	args = parser.parse_args()
+	random_state =  args.random_state
+	nb_programs = int(args.nb_programs)
+	output_file = args.output_file
+	timeout = int(args.timeout)
+	log_file = args.log_file
+	log_interval = int(args.log_interval)
+	deduplicate = True if args.deduplicate in ("true", None) else False
+	max_deduplication_trials = int(args.max_deduplication_trials)
+	programs_separator = args.programs_separator + '\n' if args.programs_separator else ""
+	use_tqdm = True if args.use_tqdm  in ("true", None) else False 
+	
+	# Saving or setting the random state
+	if args.random_state is None:
+		random_state = random.getstate()
+		now = datetime.datetime.now()
+		date_hour = now.strftime("%Y-%m-%d_%H-%M")
+		Path("./frcg-random-states").mkdir(parents = True, exist_ok = True)
+		with open(f"./frcg-random-states/random_state_{date_hour}.bin", "wb") as f:
+			pickle.dump(random_state, f)
+	else:
+		try:
+			with open(args.random_state, "rb") as f:
+				random_state = pickle.load(f)
+				random.setstate(random_state)
+		except Exception:
+			random_state = random.getstate()
+			now = datetime.datetime.now()
+			date_hour = now.strftime("%Y-%m-%d_%H-%M")
+			Path(args.random_state).mkdir(parents = True, exist_ok = True)
+			with open(f"{args.random_state}/random_state_{date_hour}.bin", "wb") as f:
+				pickle.dump(random_state, f)
+	
+	## Launching the generation
+	class TimeoutException(Exception):
+		pass
+
+	def timeout_handler(signum, frame):
+		raise TimeoutException()
+
+	signal.signal(signal.SIGALRM, timeout_handler)
+	
+
+	nb_timeouts = 0
+	nb_zero_divisions = 0
+	nb_generated_programs = 0
+	hashes = set()
+	nb_deduplication_trials = 0
+	
+	# Setting the starting_time and first checkpoint time
+	start_time = datetime.datetime.now()
+	checkpoint_time = start_time
+
+	# Opening the logging file and the data output file
+	f_log_file = open(log_file, "w")
+	f = open(output_file, "w")
+
+	# Checking if we use tqdm
+	if use_tqdm:
+		from tqdm import tqdm
+		pbar = tqdm(desc="Generation", total=nb_programs)
+
+	# Launching the loop
+	while nb_generated_programs < nb_programs:
+		
+		# Checking if it's log interval
+		if nb_generated_programs % log_interval == 0:
+			now = datetime.datetime.now()
+			f_log_file.write(f"Generated {nb_generated_programs:<{len(str(nb_programs))}} programs,  absolute time: {now - start_time},  relative time: {now - checkpoint_time}\n")
+			f_log_file.flush()
+			checkpoint_time = now
+		
+		# Generating the code
+		code = generate_random_code()
+		
+		# In case of deduplicate
+		if deduplicate:
+			code_hash = hashlib.sha256(code.encode('utf-8')).hexdigest()
+			if code_hash in hashes:
+				nb_deduplication_trials += 1
+				if nb_deduplication_trials == max_deduplication_trials:
+					print("DEDUPLICATE PROBLEM ")
+					break
+				else:
+					continue
+			else:
+				nb_deduplication_trials = 0
+				hashes.add(code_hash)
+		
+		# Trying the execute the generated code
+		sio = StringIO()
+		try:
+			with redirect_stdout(sio):
+				signal.alarm(timeout)
+				exec(code, dict())
+			
+			# Setting the alarm to 0 just in case it's not enough for the remaining code of the try block to finish execution if no exception occured during exec
+			signal.alarm(0)
+
+			# Saving the code example with its output
+			output = sio.getvalue()
+			result = programs_separator + code + "# output\n# " + "\n# ".join(output.split("\n")[:-1])
+			f.write(result + "\n\n")
+			nb_generated_programs += 1
+			
+			# If using tqdm ...
+			if use_tqdm:
+				pbar.update(1) 
+
+		except ZeroDivisionError:
+			output = "ZeroDivisionError"
+			nb_zero_divisions += 1
+		except ValueError:
+			nb_timeouts += 1
+			output = "ValueError"
+		except OverflowError:
+			nb_timeouts += 1
+			output = "OverflowError"
+		except TimeoutException as e:
+			nb_timeouts += 1
+			output = "TimeoutError"
+		finally:
+			signal.alarm(0)
+	
+	print(f"percentage of timeouts: {nb_timeouts/nb_programs * 100:.2f}%")
+	print(f"percentage of zero divisions: {nb_zero_divisions/nb_programs * 100:.2f}%")
+
+	# Closing the logging and data output files
+	f_log_file.close()
+	f.close()
\ No newline at end of file
diff --git a/datasets/dataset-2/log.txt b/datasets/dataset-2/log.txt
new file mode 100644
index 0000000..74416a3
--- /dev/null
+++ b/datasets/dataset-2/log.txt
@@ -0,0 +1,1298 @@
+Generated 0        programs,  absolute time: 0:00:00.002103,  relative time: 0:00:00.002103
+Generated 10000    programs,  absolute time: 0:00:05.424080,  relative time: 0:00:05.421977
+Generated 20000    programs,  absolute time: 0:00:10.899208,  relative time: 0:00:05.475128
+Generated 30000    programs,  absolute time: 0:00:16.423542,  relative time: 0:00:05.524334
+Generated 40000    programs,  absolute time: 0:00:21.910547,  relative time: 0:00:05.487005
+Generated 40000    programs,  absolute time: 0:00:21.911530,  relative time: 0:00:00.000983
+Generated 50000    programs,  absolute time: 0:00:27.409682,  relative time: 0:00:05.498152
+Generated 60000    programs,  absolute time: 0:00:32.908636,  relative time: 0:00:05.498954
+Generated 70000    programs,  absolute time: 0:00:40.470451,  relative time: 0:00:07.561815
+Generated 80000    programs,  absolute time: 0:00:48.008180,  relative time: 0:00:07.537729
+Generated 90000    programs,  absolute time: 0:00:55.483288,  relative time: 0:00:07.475108
+Generated 100000   programs,  absolute time: 0:01:00.994529,  relative time: 0:00:05.511241
+Generated 100000   programs,  absolute time: 0:01:00.994794,  relative time: 0:00:00.000265
+Generated 100000   programs,  absolute time: 0:01:00.994927,  relative time: 0:00:00.000133
+Generated 110000   programs,  absolute time: 0:01:06.448307,  relative time: 0:00:05.453380
+Generated 120000   programs,  absolute time: 0:01:11.975356,  relative time: 0:00:05.527049
+Generated 130000   programs,  absolute time: 0:01:17.482048,  relative time: 0:00:05.506692
+Generated 140000   programs,  absolute time: 0:01:22.914352,  relative time: 0:00:05.432304
+Generated 150000   programs,  absolute time: 0:01:28.359424,  relative time: 0:00:05.445072
+Generated 160000   programs,  absolute time: 0:01:33.822822,  relative time: 0:00:05.463398
+Generated 170000   programs,  absolute time: 0:01:39.299188,  relative time: 0:00:05.476366
+Generated 180000   programs,  absolute time: 0:01:44.812332,  relative time: 0:00:05.513144
+Generated 190000   programs,  absolute time: 0:01:50.322108,  relative time: 0:00:05.509776
+Generated 200000   programs,  absolute time: 0:01:55.818877,  relative time: 0:00:05.496769
+Generated 210000   programs,  absolute time: 0:02:01.288378,  relative time: 0:00:05.469501
+Generated 210000   programs,  absolute time: 0:02:01.289303,  relative time: 0:00:00.000925
+Generated 220000   programs,  absolute time: 0:02:06.803420,  relative time: 0:00:05.514117
+Generated 230000   programs,  absolute time: 0:02:12.326158,  relative time: 0:00:05.522738
+Generated 240000   programs,  absolute time: 0:02:17.851312,  relative time: 0:00:05.525154
+Generated 250000   programs,  absolute time: 0:02:23.426926,  relative time: 0:00:05.575614
+Generated 260000   programs,  absolute time: 0:02:28.901015,  relative time: 0:00:05.474089
+Generated 270000   programs,  absolute time: 0:02:34.433744,  relative time: 0:00:05.532729
+Generated 280000   programs,  absolute time: 0:02:40.003129,  relative time: 0:00:05.569385
+Generated 290000   programs,  absolute time: 0:02:45.485170,  relative time: 0:00:05.482041
+Generated 300000   programs,  absolute time: 0:02:51.039686,  relative time: 0:00:05.554516
+Generated 310000   programs,  absolute time: 0:02:56.533903,  relative time: 0:00:05.494217
+Generated 320000   programs,  absolute time: 0:03:02.076824,  relative time: 0:00:05.542921
+Generated 330000   programs,  absolute time: 0:03:07.641067,  relative time: 0:00:05.564243
+Generated 340000   programs,  absolute time: 0:03:13.173179,  relative time: 0:00:05.532112
+Generated 350000   programs,  absolute time: 0:03:18.641796,  relative time: 0:00:05.468617
+Generated 360000   programs,  absolute time: 0:03:24.163978,  relative time: 0:00:05.522182
+Generated 370000   programs,  absolute time: 0:03:29.683413,  relative time: 0:00:05.519435
+Generated 380000   programs,  absolute time: 0:03:35.248523,  relative time: 0:00:05.565110
+Generated 390000   programs,  absolute time: 0:03:42.792444,  relative time: 0:00:07.543921
+Generated 400000   programs,  absolute time: 0:03:48.323767,  relative time: 0:00:05.531323
+Generated 410000   programs,  absolute time: 0:03:53.826377,  relative time: 0:00:05.502610
+Generated 420000   programs,  absolute time: 0:03:59.329074,  relative time: 0:00:05.502697
+Generated 430000   programs,  absolute time: 0:04:04.838983,  relative time: 0:00:05.509909
+Generated 440000   programs,  absolute time: 0:04:10.343460,  relative time: 0:00:05.504477
+Generated 440000   programs,  absolute time: 0:04:10.343782,  relative time: 0:00:00.000322
+Generated 450000   programs,  absolute time: 0:04:15.850084,  relative time: 0:00:05.506302
+Generated 450000   programs,  absolute time: 0:04:15.850972,  relative time: 0:00:00.000888
+Generated 460000   programs,  absolute time: 0:04:21.347730,  relative time: 0:00:05.496758
+Generated 470000   programs,  absolute time: 0:04:26.868076,  relative time: 0:00:05.520346
+Generated 480000   programs,  absolute time: 0:04:32.347746,  relative time: 0:00:05.479670
+Generated 490000   programs,  absolute time: 0:04:37.854649,  relative time: 0:00:05.506903
+Generated 500000   programs,  absolute time: 0:04:43.393034,  relative time: 0:00:05.538385
+Generated 510000   programs,  absolute time: 0:04:50.936403,  relative time: 0:00:07.543369
+Generated 520000   programs,  absolute time: 0:04:56.439178,  relative time: 0:00:05.502775
+Generated 530000   programs,  absolute time: 0:05:01.972432,  relative time: 0:00:05.533254
+Generated 540000   programs,  absolute time: 0:05:07.473782,  relative time: 0:00:05.501350
+Generated 550000   programs,  absolute time: 0:05:12.989377,  relative time: 0:00:05.515595
+Generated 560000   programs,  absolute time: 0:05:18.505868,  relative time: 0:00:05.516491
+Generated 570000   programs,  absolute time: 0:05:24.017287,  relative time: 0:00:05.511419
+Generated 580000   programs,  absolute time: 0:05:29.589166,  relative time: 0:00:05.571879
+Generated 590000   programs,  absolute time: 0:05:35.148119,  relative time: 0:00:05.558953
+Generated 600000   programs,  absolute time: 0:05:40.681717,  relative time: 0:00:05.533598
+Generated 610000   programs,  absolute time: 0:05:50.216270,  relative time: 0:00:09.534553
+Generated 620000   programs,  absolute time: 0:05:55.749222,  relative time: 0:00:05.532952
+Generated 630000   programs,  absolute time: 0:06:01.285849,  relative time: 0:00:05.536627
+Generated 640000   programs,  absolute time: 0:06:06.784973,  relative time: 0:00:05.499124
+Generated 650000   programs,  absolute time: 0:06:12.333942,  relative time: 0:00:05.548969
+Generated 650000   programs,  absolute time: 0:06:12.334314,  relative time: 0:00:00.000372
+Generated 660000   programs,  absolute time: 0:06:17.885906,  relative time: 0:00:05.551592
+Generated 670000   programs,  absolute time: 0:06:24.521868,  relative time: 0:00:06.635962
+Generated 670000   programs,  absolute time: 0:06:24.522788,  relative time: 0:00:00.000920
+Generated 680000   programs,  absolute time: 0:06:31.338176,  relative time: 0:00:06.815388
+Generated 690000   programs,  absolute time: 0:06:36.927238,  relative time: 0:00:05.589062
+Generated 700000   programs,  absolute time: 0:06:42.472679,  relative time: 0:00:05.545441
+Generated 710000   programs,  absolute time: 0:06:48.018585,  relative time: 0:00:05.545906
+Generated 720000   programs,  absolute time: 0:06:53.605366,  relative time: 0:00:05.586781
+Generated 720000   programs,  absolute time: 0:06:53.606212,  relative time: 0:00:00.000846
+Generated 730000   programs,  absolute time: 0:06:59.197136,  relative time: 0:00:05.590924
+Generated 740000   programs,  absolute time: 0:07:04.797430,  relative time: 0:00:05.600294
+Generated 750000   programs,  absolute time: 0:07:10.374956,  relative time: 0:00:05.577526
+Generated 760000   programs,  absolute time: 0:07:15.940392,  relative time: 0:00:05.565436
+Generated 770000   programs,  absolute time: 0:07:21.477307,  relative time: 0:00:05.536915
+Generated 780000   programs,  absolute time: 0:07:27.083577,  relative time: 0:00:05.606270
+Generated 790000   programs,  absolute time: 0:07:32.653854,  relative time: 0:00:05.570277
+Generated 800000   programs,  absolute time: 0:07:38.155952,  relative time: 0:00:05.502098
+Generated 810000   programs,  absolute time: 0:07:43.749586,  relative time: 0:00:05.593634
+Generated 820000   programs,  absolute time: 0:07:49.314764,  relative time: 0:00:05.565178
+Generated 830000   programs,  absolute time: 0:07:54.868314,  relative time: 0:00:05.553550
+Generated 840000   programs,  absolute time: 0:08:00.472295,  relative time: 0:00:05.603981
+Generated 850000   programs,  absolute time: 0:08:06.028600,  relative time: 0:00:05.556305
+Generated 860000   programs,  absolute time: 0:08:11.569128,  relative time: 0:00:05.540528
+Generated 870000   programs,  absolute time: 0:08:17.123066,  relative time: 0:00:05.553938
+Generated 880000   programs,  absolute time: 0:08:22.700043,  relative time: 0:00:05.576977
+Generated 890000   programs,  absolute time: 0:08:28.317686,  relative time: 0:00:05.617643
+Generated 900000   programs,  absolute time: 0:08:33.893448,  relative time: 0:00:05.575762
+Generated 910000   programs,  absolute time: 0:08:39.481015,  relative time: 0:00:05.587567
+Generated 920000   programs,  absolute time: 0:08:45.097942,  relative time: 0:00:05.616927
+Generated 920000   programs,  absolute time: 0:08:45.098518,  relative time: 0:00:00.000576
+Generated 930000   programs,  absolute time: 0:08:52.691832,  relative time: 0:00:07.593314
+Generated 940000   programs,  absolute time: 0:08:58.270982,  relative time: 0:00:05.579150
+Generated 950000   programs,  absolute time: 0:09:03.893586,  relative time: 0:00:05.622604
+Generated 960000   programs,  absolute time: 0:09:09.536934,  relative time: 0:00:05.643348
+Generated 970000   programs,  absolute time: 0:09:15.123368,  relative time: 0:00:05.586434
+Generated 980000   programs,  absolute time: 0:09:20.700893,  relative time: 0:00:05.577525
+Generated 990000   programs,  absolute time: 0:09:30.303305,  relative time: 0:00:09.602412
+Generated 1000000  programs,  absolute time: 0:09:36.028770,  relative time: 0:00:05.725465
+Generated 1010000  programs,  absolute time: 0:09:41.691181,  relative time: 0:00:05.662411
+Generated 1020000  programs,  absolute time: 0:09:47.304779,  relative time: 0:00:05.613598
+Generated 1030000  programs,  absolute time: 0:09:52.889500,  relative time: 0:00:05.584721
+Generated 1040000  programs,  absolute time: 0:09:58.485285,  relative time: 0:00:05.595785
+Generated 1050000  programs,  absolute time: 0:10:04.096233,  relative time: 0:00:05.610948
+Generated 1060000  programs,  absolute time: 0:10:09.713636,  relative time: 0:00:05.617403
+Generated 1070000  programs,  absolute time: 0:10:15.312273,  relative time: 0:00:05.598637
+Generated 1080000  programs,  absolute time: 0:10:20.923201,  relative time: 0:00:05.610928
+Generated 1090000  programs,  absolute time: 0:10:26.569510,  relative time: 0:00:05.646309
+Generated 1100000  programs,  absolute time: 0:10:32.125107,  relative time: 0:00:05.555597
+Generated 1110000  programs,  absolute time: 0:10:37.761354,  relative time: 0:00:05.636247
+Generated 1120000  programs,  absolute time: 0:10:43.342447,  relative time: 0:00:05.581093
+Generated 1130000  programs,  absolute time: 0:10:48.958076,  relative time: 0:00:05.615629
+Generated 1140000  programs,  absolute time: 0:10:54.563550,  relative time: 0:00:05.605474
+Generated 1150000  programs,  absolute time: 0:11:00.156154,  relative time: 0:00:05.592604
+Generated 1160000  programs,  absolute time: 0:11:05.732996,  relative time: 0:00:05.576842
+Generated 1170000  programs,  absolute time: 0:11:11.347273,  relative time: 0:00:05.614277
+Generated 1180000  programs,  absolute time: 0:11:16.928858,  relative time: 0:00:05.581585
+Generated 1190000  programs,  absolute time: 0:11:22.528230,  relative time: 0:00:05.599372
+Generated 1200000  programs,  absolute time: 0:11:28.173455,  relative time: 0:00:05.645225
+Generated 1210000  programs,  absolute time: 0:11:33.780384,  relative time: 0:00:05.606929
+Generated 1220000  programs,  absolute time: 0:11:39.420057,  relative time: 0:00:05.639673
+Generated 1230000  programs,  absolute time: 0:11:45.002059,  relative time: 0:00:05.582002
+Generated 1240000  programs,  absolute time: 0:11:51.695698,  relative time: 0:00:06.693639
+Generated 1250000  programs,  absolute time: 0:11:57.316782,  relative time: 0:00:05.621084
+Generated 1260000  programs,  absolute time: 0:12:02.890657,  relative time: 0:00:05.573875
+Generated 1270000  programs,  absolute time: 0:12:08.530980,  relative time: 0:00:05.640323
+Generated 1280000  programs,  absolute time: 0:12:14.142054,  relative time: 0:00:05.611074
+Generated 1290000  programs,  absolute time: 0:12:21.744903,  relative time: 0:00:07.602849
+Generated 1300000  programs,  absolute time: 0:12:27.401202,  relative time: 0:00:05.656299
+Generated 1310000  programs,  absolute time: 0:12:33.008692,  relative time: 0:00:05.607490
+Generated 1320000  programs,  absolute time: 0:12:38.618658,  relative time: 0:00:05.609966
+Generated 1330000  programs,  absolute time: 0:12:44.267520,  relative time: 0:00:05.648862
+Generated 1340000  programs,  absolute time: 0:12:49.833604,  relative time: 0:00:05.566084
+Generated 1350000  programs,  absolute time: 0:12:55.494575,  relative time: 0:00:05.660971
+Generated 1360000  programs,  absolute time: 0:13:01.142778,  relative time: 0:00:05.648203
+Generated 1360000  programs,  absolute time: 0:13:01.143493,  relative time: 0:00:00.000715
+Generated 1370000  programs,  absolute time: 0:13:06.818969,  relative time: 0:00:05.675476
+Generated 1380000  programs,  absolute time: 0:13:14.423041,  relative time: 0:00:07.604072
+Generated 1380000  programs,  absolute time: 0:13:14.423966,  relative time: 0:00:00.000925
+Generated 1390000  programs,  absolute time: 0:13:20.040543,  relative time: 0:00:05.616577
+Generated 1400000  programs,  absolute time: 0:13:25.690782,  relative time: 0:00:05.650239
+Generated 1410000  programs,  absolute time: 0:13:31.303168,  relative time: 0:00:05.612386
+Generated 1410000  programs,  absolute time: 0:13:31.303591,  relative time: 0:00:00.000423
+Generated 1420000  programs,  absolute time: 0:13:36.924958,  relative time: 0:00:05.621367
+Generated 1430000  programs,  absolute time: 0:13:42.517561,  relative time: 0:00:05.592603
+Generated 1440000  programs,  absolute time: 0:13:48.146187,  relative time: 0:00:05.628626
+Generated 1450000  programs,  absolute time: 0:13:53.762201,  relative time: 0:00:05.616014
+Generated 1460000  programs,  absolute time: 0:13:59.418540,  relative time: 0:00:05.656339
+Generated 1470000  programs,  absolute time: 0:14:05.079059,  relative time: 0:00:05.660519
+Generated 1480000  programs,  absolute time: 0:14:10.697113,  relative time: 0:00:05.618054
+Generated 1490000  programs,  absolute time: 0:14:16.282757,  relative time: 0:00:05.585644
+Generated 1500000  programs,  absolute time: 0:14:21.893172,  relative time: 0:00:05.610415
+Generated 1510000  programs,  absolute time: 0:14:27.499743,  relative time: 0:00:05.606571
+Generated 1520000  programs,  absolute time: 0:14:33.166338,  relative time: 0:00:05.666595
+Generated 1530000  programs,  absolute time: 0:14:38.781397,  relative time: 0:00:05.615059
+Generated 1540000  programs,  absolute time: 0:14:44.380041,  relative time: 0:00:05.598644
+Generated 1550000  programs,  absolute time: 0:14:49.978035,  relative time: 0:00:05.597994
+Generated 1560000  programs,  absolute time: 0:14:55.592167,  relative time: 0:00:05.614132
+Generated 1570000  programs,  absolute time: 0:15:01.209050,  relative time: 0:00:05.616883
+Generated 1580000  programs,  absolute time: 0:15:06.806714,  relative time: 0:00:05.597664
+Generated 1590000  programs,  absolute time: 0:15:12.454209,  relative time: 0:00:05.647495
+Generated 1600000  programs,  absolute time: 0:15:18.135068,  relative time: 0:00:05.680859
+Generated 1600000  programs,  absolute time: 0:15:18.135483,  relative time: 0:00:00.000415
+Generated 1610000  programs,  absolute time: 0:15:23.759960,  relative time: 0:00:05.624477
+Generated 1620000  programs,  absolute time: 0:15:29.330429,  relative time: 0:00:05.570469
+Generated 1620000  programs,  absolute time: 0:15:29.331117,  relative time: 0:00:00.000688
+Generated 1620000  programs,  absolute time: 0:15:29.331847,  relative time: 0:00:00.000730
+Generated 1630000  programs,  absolute time: 0:15:34.949059,  relative time: 0:00:05.617212
+Generated 1630000  programs,  absolute time: 0:15:34.949223,  relative time: 0:00:00.000164
+Generated 1630000  programs,  absolute time: 0:15:34.949313,  relative time: 0:00:00.000090
+Generated 1640000  programs,  absolute time: 0:15:40.625484,  relative time: 0:00:05.676171
+Generated 1650000  programs,  absolute time: 0:15:46.203339,  relative time: 0:00:05.577855
+Generated 1660000  programs,  absolute time: 0:15:53.786008,  relative time: 0:00:07.582669
+Generated 1670000  programs,  absolute time: 0:15:59.398325,  relative time: 0:00:05.612317
+Generated 1680000  programs,  absolute time: 0:16:07.012661,  relative time: 0:00:07.614336
+Generated 1690000  programs,  absolute time: 0:16:12.606826,  relative time: 0:00:05.594165
+Generated 1700000  programs,  absolute time: 0:16:18.229358,  relative time: 0:00:05.622532
+Generated 1710000  programs,  absolute time: 0:16:23.831739,  relative time: 0:00:05.602381
+Generated 1720000  programs,  absolute time: 0:16:29.419918,  relative time: 0:00:05.588179
+Generated 1730000  programs,  absolute time: 0:16:34.999030,  relative time: 0:00:05.579112
+Generated 1740000  programs,  absolute time: 0:16:40.593800,  relative time: 0:00:05.594770
+Generated 1750000  programs,  absolute time: 0:16:46.143237,  relative time: 0:00:05.549437
+Generated 1760000  programs,  absolute time: 0:16:51.701601,  relative time: 0:00:05.558364
+Generated 1770000  programs,  absolute time: 0:16:57.322710,  relative time: 0:00:05.621109
+Generated 1780000  programs,  absolute time: 0:17:02.888157,  relative time: 0:00:05.565447
+Generated 1790000  programs,  absolute time: 0:17:08.491641,  relative time: 0:00:05.603484
+Generated 1800000  programs,  absolute time: 0:17:14.092209,  relative time: 0:00:05.600568
+Generated 1810000  programs,  absolute time: 0:17:19.681253,  relative time: 0:00:05.589044
+Generated 1810000  programs,  absolute time: 0:17:19.682002,  relative time: 0:00:00.000749
+Generated 1820000  programs,  absolute time: 0:17:25.322462,  relative time: 0:00:05.640460
+Generated 1830000  programs,  absolute time: 0:17:30.866937,  relative time: 0:00:05.544475
+Generated 1840000  programs,  absolute time: 0:17:36.479925,  relative time: 0:00:05.612988
+Generated 1850000  programs,  absolute time: 0:17:42.032832,  relative time: 0:00:05.552907
+Generated 1860000  programs,  absolute time: 0:17:47.581608,  relative time: 0:00:05.548776
+Generated 1870000  programs,  absolute time: 0:17:53.197070,  relative time: 0:00:05.615462
+Generated 1870000  programs,  absolute time: 0:17:53.197893,  relative time: 0:00:00.000823
+Generated 1880000  programs,  absolute time: 0:17:58.840427,  relative time: 0:00:05.642534
+Generated 1890000  programs,  absolute time: 0:18:04.487790,  relative time: 0:00:05.647363
+Generated 1900000  programs,  absolute time: 0:18:10.125536,  relative time: 0:00:05.637746
+Generated 1910000  programs,  absolute time: 0:18:15.817001,  relative time: 0:00:05.691465
+Generated 1920000  programs,  absolute time: 0:18:21.491309,  relative time: 0:00:05.674308
+Generated 1930000  programs,  absolute time: 0:18:27.172000,  relative time: 0:00:05.680691
+Generated 1940000  programs,  absolute time: 0:18:32.760246,  relative time: 0:00:05.588246
+Generated 1950000  programs,  absolute time: 0:18:38.408569,  relative time: 0:00:05.648323
+Generated 1960000  programs,  absolute time: 0:18:44.040419,  relative time: 0:00:05.631850
+Generated 1970000  programs,  absolute time: 0:18:49.628207,  relative time: 0:00:05.587788
+Generated 1980000  programs,  absolute time: 0:18:55.278436,  relative time: 0:00:05.650229
+Generated 1990000  programs,  absolute time: 0:19:00.922327,  relative time: 0:00:05.643891
+Generated 2000000  programs,  absolute time: 0:19:06.521026,  relative time: 0:00:05.598699
+Generated 2010000  programs,  absolute time: 0:19:12.132890,  relative time: 0:00:05.611864
+Generated 2020000  programs,  absolute time: 0:19:17.756942,  relative time: 0:00:05.624052
+Generated 2030000  programs,  absolute time: 0:19:23.398313,  relative time: 0:00:05.641371
+Generated 2040000  programs,  absolute time: 0:19:31.030570,  relative time: 0:00:07.632257
+Generated 2050000  programs,  absolute time: 0:19:36.631836,  relative time: 0:00:05.601266
+Generated 2060000  programs,  absolute time: 0:19:42.237657,  relative time: 0:00:05.605821
+Generated 2070000  programs,  absolute time: 0:19:47.897177,  relative time: 0:00:05.659520
+Generated 2080000  programs,  absolute time: 0:19:55.493514,  relative time: 0:00:07.596337
+Generated 2090000  programs,  absolute time: 0:20:01.068499,  relative time: 0:00:05.574985
+Generated 2100000  programs,  absolute time: 0:20:06.712760,  relative time: 0:00:05.644261
+Generated 2110000  programs,  absolute time: 0:20:13.414286,  relative time: 0:00:06.701526
+Generated 2120000  programs,  absolute time: 0:20:19.089900,  relative time: 0:00:05.675614
+Generated 2130000  programs,  absolute time: 0:20:24.696276,  relative time: 0:00:05.606376
+Generated 2140000  programs,  absolute time: 0:20:30.337017,  relative time: 0:00:05.640741
+Generated 2140000  programs,  absolute time: 0:20:30.337670,  relative time: 0:00:00.000653
+Generated 2150000  programs,  absolute time: 0:20:35.942276,  relative time: 0:00:05.604606
+Generated 2160000  programs,  absolute time: 0:20:41.548222,  relative time: 0:00:05.605946
+Generated 2170000  programs,  absolute time: 0:20:47.184939,  relative time: 0:00:05.636717
+Generated 2180000  programs,  absolute time: 0:20:52.801345,  relative time: 0:00:05.616406
+Generated 2190000  programs,  absolute time: 0:20:58.395116,  relative time: 0:00:05.593771
+Generated 2200000  programs,  absolute time: 0:21:03.995616,  relative time: 0:00:05.600500
+Generated 2210000  programs,  absolute time: 0:21:09.660437,  relative time: 0:00:05.664821
+Generated 2220000  programs,  absolute time: 0:21:15.306371,  relative time: 0:00:05.645934
+Generated 2230000  programs,  absolute time: 0:21:20.895379,  relative time: 0:00:05.589008
+Generated 2240000  programs,  absolute time: 0:21:26.529754,  relative time: 0:00:05.634375
+Generated 2250000  programs,  absolute time: 0:21:32.160250,  relative time: 0:00:05.630496
+Generated 2260000  programs,  absolute time: 0:21:37.818015,  relative time: 0:00:05.657765
+Generated 2260000  programs,  absolute time: 0:21:37.818637,  relative time: 0:00:00.000622
+Generated 2270000  programs,  absolute time: 0:21:43.430761,  relative time: 0:00:05.612124
+Generated 2280000  programs,  absolute time: 0:21:49.067090,  relative time: 0:00:05.636329
+Generated 2290000  programs,  absolute time: 0:21:54.692325,  relative time: 0:00:05.625235
+Generated 2300000  programs,  absolute time: 0:22:00.338224,  relative time: 0:00:05.645899
+Generated 2310000  programs,  absolute time: 0:22:05.955137,  relative time: 0:00:05.616913
+Generated 2320000  programs,  absolute time: 0:22:11.565106,  relative time: 0:00:05.609969
+Generated 2330000  programs,  absolute time: 0:22:17.175306,  relative time: 0:00:05.610200
+Generated 2340000  programs,  absolute time: 0:22:22.813489,  relative time: 0:00:05.638183
+Generated 2350000  programs,  absolute time: 0:22:28.464999,  relative time: 0:00:05.651510
+Generated 2360000  programs,  absolute time: 0:22:34.034728,  relative time: 0:00:05.569729
+Generated 2370000  programs,  absolute time: 0:22:39.633421,  relative time: 0:00:05.598693
+Generated 2380000  programs,  absolute time: 0:22:45.238578,  relative time: 0:00:05.605157
+Generated 2390000  programs,  absolute time: 0:22:52.929067,  relative time: 0:00:07.690489
+Generated 2390000  programs,  absolute time: 0:22:52.929346,  relative time: 0:00:00.000279
+Generated 2400000  programs,  absolute time: 0:22:58.523950,  relative time: 0:00:05.594604
+Generated 2400000  programs,  absolute time: 0:22:58.524074,  relative time: 0:00:00.000124
+Generated 2410000  programs,  absolute time: 0:23:04.158412,  relative time: 0:00:05.634338
+Generated 2420000  programs,  absolute time: 0:23:09.778398,  relative time: 0:00:05.619986
+Generated 2430000  programs,  absolute time: 0:23:15.382555,  relative time: 0:00:05.604157
+Generated 2440000  programs,  absolute time: 0:23:21.011792,  relative time: 0:00:05.629237
+Generated 2450000  programs,  absolute time: 0:23:26.616361,  relative time: 0:00:05.604569
+Generated 2460000  programs,  absolute time: 0:23:32.235450,  relative time: 0:00:05.619089
+Generated 2470000  programs,  absolute time: 0:23:37.788914,  relative time: 0:00:05.553464
+Generated 2480000  programs,  absolute time: 0:23:43.393973,  relative time: 0:00:05.605059
+Generated 2490000  programs,  absolute time: 0:23:48.935990,  relative time: 0:00:05.542017
+Generated 2500000  programs,  absolute time: 0:23:54.519377,  relative time: 0:00:05.583387
+Generated 2510000  programs,  absolute time: 0:24:00.077864,  relative time: 0:00:05.558487
+Generated 2520000  programs,  absolute time: 0:24:07.632796,  relative time: 0:00:07.554932
+Generated 2530000  programs,  absolute time: 0:24:13.179328,  relative time: 0:00:05.546532
+Generated 2540000  programs,  absolute time: 0:24:18.735270,  relative time: 0:00:05.555942
+Generated 2550000  programs,  absolute time: 0:24:24.307691,  relative time: 0:00:05.572421
+Generated 2560000  programs,  absolute time: 0:24:29.897081,  relative time: 0:00:05.589390
+Generated 2570000  programs,  absolute time: 0:24:35.483046,  relative time: 0:00:05.585965
+Generated 2580000  programs,  absolute time: 0:24:41.004206,  relative time: 0:00:05.521160
+Generated 2590000  programs,  absolute time: 0:24:46.543876,  relative time: 0:00:05.539670
+Generated 2590000  programs,  absolute time: 0:24:46.544519,  relative time: 0:00:00.000643
+Generated 2600000  programs,  absolute time: 0:24:52.114908,  relative time: 0:00:05.570389
+Generated 2610000  programs,  absolute time: 0:24:57.698255,  relative time: 0:00:05.583347
+Generated 2620000  programs,  absolute time: 0:25:03.280456,  relative time: 0:00:05.582201
+Generated 2620000  programs,  absolute time: 0:25:03.281032,  relative time: 0:00:00.000576
+Generated 2630000  programs,  absolute time: 0:25:08.874262,  relative time: 0:00:05.593230
+Generated 2640000  programs,  absolute time: 0:25:14.462496,  relative time: 0:00:05.588234
+Generated 2650000  programs,  absolute time: 0:25:19.994272,  relative time: 0:00:05.531776
+Generated 2660000  programs,  absolute time: 0:25:25.537033,  relative time: 0:00:05.542761
+Generated 2670000  programs,  absolute time: 0:25:31.092819,  relative time: 0:00:05.555786
+Generated 2680000  programs,  absolute time: 0:25:36.685085,  relative time: 0:00:05.592266
+Generated 2690000  programs,  absolute time: 0:25:42.279400,  relative time: 0:00:05.594315
+Generated 2700000  programs,  absolute time: 0:25:49.885387,  relative time: 0:00:07.605987
+Generated 2710000  programs,  absolute time: 0:25:55.444620,  relative time: 0:00:05.559233
+Generated 2720000  programs,  absolute time: 0:26:00.988799,  relative time: 0:00:05.544179
+Generated 2730000  programs,  absolute time: 0:26:06.572726,  relative time: 0:00:05.583927
+Generated 2740000  programs,  absolute time: 0:26:12.211728,  relative time: 0:00:05.639002
+Generated 2750000  programs,  absolute time: 0:26:17.818682,  relative time: 0:00:05.606954
+Generated 2760000  programs,  absolute time: 0:26:23.354510,  relative time: 0:00:05.535828
+Generated 2760000  programs,  absolute time: 0:26:23.355306,  relative time: 0:00:00.000796
+Generated 2770000  programs,  absolute time: 0:26:28.931040,  relative time: 0:00:05.575734
+Generated 2780000  programs,  absolute time: 0:26:34.469081,  relative time: 0:00:05.538041
+Generated 2790000  programs,  absolute time: 0:26:39.988783,  relative time: 0:00:05.519702
+Generated 2800000  programs,  absolute time: 0:26:45.611962,  relative time: 0:00:05.623179
+Generated 2810000  programs,  absolute time: 0:26:51.189772,  relative time: 0:00:05.577810
+Generated 2820000  programs,  absolute time: 0:26:56.703552,  relative time: 0:00:05.513780
+Generated 2830000  programs,  absolute time: 0:27:02.249061,  relative time: 0:00:05.545509
+Generated 2840000  programs,  absolute time: 0:27:07.811872,  relative time: 0:00:05.562811
+Generated 2850000  programs,  absolute time: 0:27:13.339574,  relative time: 0:00:05.527702
+Generated 2860000  programs,  absolute time: 0:27:18.906371,  relative time: 0:00:05.566797
+Generated 2870000  programs,  absolute time: 0:27:24.426186,  relative time: 0:00:05.519815
+Generated 2880000  programs,  absolute time: 0:27:29.996124,  relative time: 0:00:05.569938
+Generated 2890000  programs,  absolute time: 0:27:35.601052,  relative time: 0:00:05.604928
+Generated 2900000  programs,  absolute time: 0:27:41.178712,  relative time: 0:00:05.577660
+Generated 2910000  programs,  absolute time: 0:27:46.712037,  relative time: 0:00:05.533325
+Generated 2920000  programs,  absolute time: 0:27:52.295915,  relative time: 0:00:05.583878
+Generated 2930000  programs,  absolute time: 0:27:57.851530,  relative time: 0:00:05.555615
+Generated 2940000  programs,  absolute time: 0:28:03.471563,  relative time: 0:00:05.620033
+Generated 2950000  programs,  absolute time: 0:28:09.117410,  relative time: 0:00:05.645847
+Generated 2960000  programs,  absolute time: 0:28:14.678455,  relative time: 0:00:05.561045
+Generated 2970000  programs,  absolute time: 0:28:20.269598,  relative time: 0:00:05.591143
+Generated 2980000  programs,  absolute time: 0:28:25.903997,  relative time: 0:00:05.634399
+Generated 2990000  programs,  absolute time: 0:28:33.474799,  relative time: 0:00:07.570802
+Generated 2990000  programs,  absolute time: 0:28:33.475472,  relative time: 0:00:00.000673
+Generated 3000000  programs,  absolute time: 0:28:39.051709,  relative time: 0:00:05.576237
+Generated 3010000  programs,  absolute time: 0:28:44.615912,  relative time: 0:00:05.564203
+Generated 3020000  programs,  absolute time: 0:28:50.198375,  relative time: 0:00:05.582463
+Generated 3030000  programs,  absolute time: 0:28:55.752108,  relative time: 0:00:05.553733
+Generated 3040000  programs,  absolute time: 0:29:01.314979,  relative time: 0:00:05.562871
+Generated 3050000  programs,  absolute time: 0:29:06.890651,  relative time: 0:00:05.575672
+Generated 3060000  programs,  absolute time: 0:29:12.470522,  relative time: 0:00:05.579871
+Generated 3070000  programs,  absolute time: 0:29:18.012247,  relative time: 0:00:05.541725
+Generated 3080000  programs,  absolute time: 0:29:23.653470,  relative time: 0:00:05.641223
+Generated 3090000  programs,  absolute time: 0:29:29.240814,  relative time: 0:00:05.587344
+Generated 3100000  programs,  absolute time: 0:29:36.800117,  relative time: 0:00:07.559303
+Generated 3110000  programs,  absolute time: 0:29:43.319293,  relative time: 0:00:06.519176
+Generated 3120000  programs,  absolute time: 0:29:48.869809,  relative time: 0:00:05.550516
+Generated 3130000  programs,  absolute time: 0:29:54.446559,  relative time: 0:00:05.576750
+Generated 3140000  programs,  absolute time: 0:30:00.010419,  relative time: 0:00:05.563860
+Generated 3150000  programs,  absolute time: 0:30:05.587635,  relative time: 0:00:05.577216
+Generated 3160000  programs,  absolute time: 0:30:11.117103,  relative time: 0:00:05.529468
+Generated 3170000  programs,  absolute time: 0:30:16.688613,  relative time: 0:00:05.571510
+Generated 3180000  programs,  absolute time: 0:30:22.318173,  relative time: 0:00:05.629560
+Generated 3190000  programs,  absolute time: 0:30:27.828800,  relative time: 0:00:05.510627
+Generated 3200000  programs,  absolute time: 0:30:33.411735,  relative time: 0:00:05.582935
+Generated 3210000  programs,  absolute time: 0:30:40.997891,  relative time: 0:00:07.586156
+Generated 3220000  programs,  absolute time: 0:30:46.539428,  relative time: 0:00:05.541537
+Generated 3230000  programs,  absolute time: 0:30:52.141724,  relative time: 0:00:05.602296
+Generated 3240000  programs,  absolute time: 0:30:57.703039,  relative time: 0:00:05.561315
+Generated 3250000  programs,  absolute time: 0:31:03.262961,  relative time: 0:00:05.559922
+Generated 3260000  programs,  absolute time: 0:31:08.930522,  relative time: 0:00:05.667561
+Generated 3270000  programs,  absolute time: 0:31:14.489208,  relative time: 0:00:05.558686
+Generated 3280000  programs,  absolute time: 0:31:20.054577,  relative time: 0:00:05.565369
+Generated 3290000  programs,  absolute time: 0:31:25.616350,  relative time: 0:00:05.561773
+Generated 3300000  programs,  absolute time: 0:31:31.404374,  relative time: 0:00:05.788024
+Generated 3310000  programs,  absolute time: 0:31:36.988513,  relative time: 0:00:05.584139
+Generated 3320000  programs,  absolute time: 0:31:42.577657,  relative time: 0:00:05.589144
+Generated 3330000  programs,  absolute time: 0:31:48.177299,  relative time: 0:00:05.599642
+Generated 3340000  programs,  absolute time: 0:31:55.739540,  relative time: 0:00:07.562241
+Generated 3350000  programs,  absolute time: 0:32:01.527271,  relative time: 0:00:05.787731
+Generated 3360000  programs,  absolute time: 0:32:07.103815,  relative time: 0:00:05.576544
+Generated 3370000  programs,  absolute time: 0:32:12.646470,  relative time: 0:00:05.542655
+Generated 3380000  programs,  absolute time: 0:32:18.237709,  relative time: 0:00:05.591239
+Generated 3390000  programs,  absolute time: 0:32:23.846876,  relative time: 0:00:05.609167
+Generated 3400000  programs,  absolute time: 0:32:29.470192,  relative time: 0:00:05.623316
+Generated 3410000  programs,  absolute time: 0:32:35.026249,  relative time: 0:00:05.556057
+Generated 3420000  programs,  absolute time: 0:32:40.626838,  relative time: 0:00:05.600589
+Generated 3420000  programs,  absolute time: 0:32:40.627418,  relative time: 0:00:00.000580
+Generated 3430000  programs,  absolute time: 0:32:46.209049,  relative time: 0:00:05.581631
+Generated 3430000  programs,  absolute time: 0:32:46.209199,  relative time: 0:00:00.000150
+Generated 3440000  programs,  absolute time: 0:32:51.719029,  relative time: 0:00:05.509830
+Generated 3450000  programs,  absolute time: 0:32:57.307110,  relative time: 0:00:05.588081
+Generated 3460000  programs,  absolute time: 0:33:02.856287,  relative time: 0:00:05.549177
+Generated 3470000  programs,  absolute time: 0:33:08.463430,  relative time: 0:00:05.607143
+Generated 3480000  programs,  absolute time: 0:33:14.050421,  relative time: 0:00:05.586991
+Generated 3490000  programs,  absolute time: 0:33:19.614110,  relative time: 0:00:05.563689
+Generated 3500000  programs,  absolute time: 0:33:25.148394,  relative time: 0:00:05.534284
+Generated 3510000  programs,  absolute time: 0:33:30.673706,  relative time: 0:00:05.525312
+Generated 3520000  programs,  absolute time: 0:33:38.241031,  relative time: 0:00:07.567325
+Generated 3530000  programs,  absolute time: 0:33:43.871264,  relative time: 0:00:05.630233
+Generated 3540000  programs,  absolute time: 0:33:49.745861,  relative time: 0:00:05.874597
+Generated 3550000  programs,  absolute time: 0:33:55.331226,  relative time: 0:00:05.585365
+Generated 3560000  programs,  absolute time: 0:34:00.890516,  relative time: 0:00:05.559290
+Generated 3570000  programs,  absolute time: 0:34:06.465889,  relative time: 0:00:05.575373
+Generated 3580000  programs,  absolute time: 0:34:12.032295,  relative time: 0:00:05.566406
+Generated 3590000  programs,  absolute time: 0:34:17.613334,  relative time: 0:00:05.581039
+Generated 3600000  programs,  absolute time: 0:34:25.174034,  relative time: 0:00:07.560700
+Generated 3610000  programs,  absolute time: 0:34:30.773266,  relative time: 0:00:05.599232
+Generated 3620000  programs,  absolute time: 0:34:36.317518,  relative time: 0:00:05.544252
+Generated 3620000  programs,  absolute time: 0:34:36.318499,  relative time: 0:00:00.000981
+Generated 3630000  programs,  absolute time: 0:34:41.861531,  relative time: 0:00:05.543032
+Generated 3640000  programs,  absolute time: 0:34:48.047882,  relative time: 0:00:06.186351
+Generated 3650000  programs,  absolute time: 0:34:53.643920,  relative time: 0:00:05.596038
+Generated 3660000  programs,  absolute time: 0:34:59.208000,  relative time: 0:00:05.564080
+Generated 3670000  programs,  absolute time: 0:35:04.792749,  relative time: 0:00:05.584749
+Generated 3680000  programs,  absolute time: 0:35:10.376883,  relative time: 0:00:05.584134
+Generated 3690000  programs,  absolute time: 0:35:15.899126,  relative time: 0:00:05.522243
+Generated 3700000  programs,  absolute time: 0:35:21.465395,  relative time: 0:00:05.566269
+Generated 3710000  programs,  absolute time: 0:35:27.062476,  relative time: 0:00:05.597081
+Generated 3720000  programs,  absolute time: 0:35:32.632103,  relative time: 0:00:05.569627
+Generated 3730000  programs,  absolute time: 0:35:38.202964,  relative time: 0:00:05.570861
+Generated 3740000  programs,  absolute time: 0:35:43.772143,  relative time: 0:00:05.569179
+Generated 3750000  programs,  absolute time: 0:35:49.343454,  relative time: 0:00:05.571311
+Generated 3760000  programs,  absolute time: 0:35:54.932806,  relative time: 0:00:05.589352
+Generated 3770000  programs,  absolute time: 0:36:00.513014,  relative time: 0:00:05.580208
+Generated 3780000  programs,  absolute time: 0:36:06.096553,  relative time: 0:00:05.583539
+Generated 3790000  programs,  absolute time: 0:36:11.688958,  relative time: 0:00:05.592405
+Generated 3800000  programs,  absolute time: 0:36:17.264843,  relative time: 0:00:05.575885
+Generated 3810000  programs,  absolute time: 0:36:22.865532,  relative time: 0:00:05.600689
+Generated 3820000  programs,  absolute time: 0:36:28.465926,  relative time: 0:00:05.600394
+Generated 3830000  programs,  absolute time: 0:36:34.067493,  relative time: 0:00:05.601567
+Generated 3840000  programs,  absolute time: 0:36:41.616911,  relative time: 0:00:07.549418
+Generated 3850000  programs,  absolute time: 0:36:47.207442,  relative time: 0:00:05.590531
+Generated 3860000  programs,  absolute time: 0:36:52.780644,  relative time: 0:00:05.573202
+Generated 3870000  programs,  absolute time: 0:36:58.379207,  relative time: 0:00:05.598563
+Generated 3880000  programs,  absolute time: 0:37:03.986654,  relative time: 0:00:05.607447
+Generated 3880000  programs,  absolute time: 0:37:03.986781,  relative time: 0:00:00.000127
+Generated 3890000  programs,  absolute time: 0:37:11.589838,  relative time: 0:00:07.603057
+Generated 3900000  programs,  absolute time: 0:37:17.172058,  relative time: 0:00:05.582220
+Generated 3910000  programs,  absolute time: 0:37:22.786021,  relative time: 0:00:05.613963
+Generated 3920000  programs,  absolute time: 0:37:28.382371,  relative time: 0:00:05.596350
+Generated 3930000  programs,  absolute time: 0:37:33.971209,  relative time: 0:00:05.588838
+Generated 3940000  programs,  absolute time: 0:37:39.502604,  relative time: 0:00:05.531395
+Generated 3950000  programs,  absolute time: 0:37:45.084127,  relative time: 0:00:05.581523
+Generated 3960000  programs,  absolute time: 0:37:50.737569,  relative time: 0:00:05.653442
+Generated 3970000  programs,  absolute time: 0:37:56.386438,  relative time: 0:00:05.648869
+Generated 3980000  programs,  absolute time: 0:38:02.024110,  relative time: 0:00:05.637672
+Generated 3990000  programs,  absolute time: 0:38:07.613315,  relative time: 0:00:05.589205
+Generated 4000000  programs,  absolute time: 0:38:13.204901,  relative time: 0:00:05.591586
+Generated 4010000  programs,  absolute time: 0:38:18.769386,  relative time: 0:00:05.564485
+Generated 4020000  programs,  absolute time: 0:38:26.339776,  relative time: 0:00:07.570390
+Generated 4030000  programs,  absolute time: 0:38:33.946914,  relative time: 0:00:07.607138
+Generated 4040000  programs,  absolute time: 0:38:39.519235,  relative time: 0:00:05.572321
+Generated 4050000  programs,  absolute time: 0:38:45.109485,  relative time: 0:00:05.590250
+Generated 4060000  programs,  absolute time: 0:38:50.683164,  relative time: 0:00:05.573679
+Generated 4070000  programs,  absolute time: 0:38:56.238384,  relative time: 0:00:05.555220
+Generated 4080000  programs,  absolute time: 0:39:01.802411,  relative time: 0:00:05.564027
+Generated 4090000  programs,  absolute time: 0:39:07.413020,  relative time: 0:00:05.610609
+Generated 4100000  programs,  absolute time: 0:39:12.961329,  relative time: 0:00:05.548309
+Generated 4110000  programs,  absolute time: 0:39:18.547626,  relative time: 0:00:05.586297
+Generated 4120000  programs,  absolute time: 0:39:24.098257,  relative time: 0:00:05.550631
+Generated 4130000  programs,  absolute time: 0:39:29.689859,  relative time: 0:00:05.591602
+Generated 4130000  programs,  absolute time: 0:39:29.690006,  relative time: 0:00:00.000147
+Generated 4130000  programs,  absolute time: 0:39:29.690738,  relative time: 0:00:00.000732
+Generated 4140000  programs,  absolute time: 0:39:35.277851,  relative time: 0:00:05.587113
+Generated 4150000  programs,  absolute time: 0:39:41.708266,  relative time: 0:00:06.430415
+Generated 4160000  programs,  absolute time: 0:39:47.258068,  relative time: 0:00:05.549802
+Generated 4170000  programs,  absolute time: 0:39:52.834475,  relative time: 0:00:05.576407
+Generated 4180000  programs,  absolute time: 0:39:58.447392,  relative time: 0:00:05.612917
+Generated 4190000  programs,  absolute time: 0:40:04.388042,  relative time: 0:00:05.940650
+Generated 4200000  programs,  absolute time: 0:40:11.959384,  relative time: 0:00:07.571342
+Generated 4210000  programs,  absolute time: 0:40:17.581225,  relative time: 0:00:05.621841
+Generated 4220000  programs,  absolute time: 0:40:23.153624,  relative time: 0:00:05.572399
+Generated 4230000  programs,  absolute time: 0:40:28.701266,  relative time: 0:00:05.547642
+Generated 4240000  programs,  absolute time: 0:40:34.336783,  relative time: 0:00:05.635517
+Generated 4250000  programs,  absolute time: 0:40:39.912938,  relative time: 0:00:05.576155
+Generated 4260000  programs,  absolute time: 0:40:45.464506,  relative time: 0:00:05.551568
+Generated 4270000  programs,  absolute time: 0:40:51.054667,  relative time: 0:00:05.590161
+Generated 4280000  programs,  absolute time: 0:40:56.645490,  relative time: 0:00:05.590823
+Generated 4290000  programs,  absolute time: 0:41:02.222736,  relative time: 0:00:05.577246
+Generated 4300000  programs,  absolute time: 0:41:07.799153,  relative time: 0:00:05.576417
+Generated 4310000  programs,  absolute time: 0:41:13.400403,  relative time: 0:00:05.601250
+Generated 4320000  programs,  absolute time: 0:41:18.967242,  relative time: 0:00:05.566839
+Generated 4330000  programs,  absolute time: 0:41:24.551081,  relative time: 0:00:05.583839
+Generated 4340000  programs,  absolute time: 0:41:30.124194,  relative time: 0:00:05.573113
+Generated 4350000  programs,  absolute time: 0:41:35.766500,  relative time: 0:00:05.642306
+Generated 4360000  programs,  absolute time: 0:41:41.354185,  relative time: 0:00:05.587685
+Generated 4370000  programs,  absolute time: 0:41:46.932568,  relative time: 0:00:05.578383
+Generated 4380000  programs,  absolute time: 0:41:52.492317,  relative time: 0:00:05.559749
+Generated 4390000  programs,  absolute time: 0:41:58.076497,  relative time: 0:00:05.584180
+Generated 4400000  programs,  absolute time: 0:42:03.718138,  relative time: 0:00:05.641641
+Generated 4410000  programs,  absolute time: 0:42:09.410238,  relative time: 0:00:05.692100
+Generated 4420000  programs,  absolute time: 0:42:14.992894,  relative time: 0:00:05.582656
+Generated 4430000  programs,  absolute time: 0:42:20.533897,  relative time: 0:00:05.541003
+Generated 4430000  programs,  absolute time: 0:42:20.534036,  relative time: 0:00:00.000139
+Generated 4440000  programs,  absolute time: 0:42:26.186030,  relative time: 0:00:05.651994
+Generated 4450000  programs,  absolute time: 0:42:33.889975,  relative time: 0:00:07.703945
+Generated 4460000  programs,  absolute time: 0:42:39.509177,  relative time: 0:00:05.619202
+Generated 4470000  programs,  absolute time: 0:42:45.110746,  relative time: 0:00:05.601569
+Generated 4480000  programs,  absolute time: 0:42:50.701320,  relative time: 0:00:05.590574
+Generated 4490000  programs,  absolute time: 0:42:58.268152,  relative time: 0:00:07.566832
+Generated 4500000  programs,  absolute time: 0:43:03.857820,  relative time: 0:00:05.589668
+Generated 4510000  programs,  absolute time: 0:43:09.395437,  relative time: 0:00:05.537617
+Generated 4520000  programs,  absolute time: 0:43:14.979447,  relative time: 0:00:05.584010
+Generated 4530000  programs,  absolute time: 0:43:20.559291,  relative time: 0:00:05.579844
+Generated 4540000  programs,  absolute time: 0:43:26.136152,  relative time: 0:00:05.576861
+Generated 4540000  programs,  absolute time: 0:43:26.137107,  relative time: 0:00:00.000955
+Generated 4550000  programs,  absolute time: 0:43:31.708699,  relative time: 0:00:05.571592
+Generated 4560000  programs,  absolute time: 0:43:37.314699,  relative time: 0:00:05.606000
+Generated 4570000  programs,  absolute time: 0:43:42.854017,  relative time: 0:00:05.539318
+Generated 4570000  programs,  absolute time: 0:43:42.854775,  relative time: 0:00:00.000758
+Generated 4580000  programs,  absolute time: 0:43:50.433657,  relative time: 0:00:07.578882
+Generated 4590000  programs,  absolute time: 0:43:56.043921,  relative time: 0:00:05.610264
+Generated 4600000  programs,  absolute time: 0:44:01.613564,  relative time: 0:00:05.569643
+Generated 4610000  programs,  absolute time: 0:44:07.197735,  relative time: 0:00:05.584171
+Generated 4620000  programs,  absolute time: 0:44:12.714701,  relative time: 0:00:05.516966
+Generated 4630000  programs,  absolute time: 0:44:18.314109,  relative time: 0:00:05.599408
+Generated 4640000  programs,  absolute time: 0:44:23.881662,  relative time: 0:00:05.567553
+Generated 4650000  programs,  absolute time: 0:44:29.432696,  relative time: 0:00:05.551034
+Generated 4650000  programs,  absolute time: 0:44:29.433390,  relative time: 0:00:00.000694
+Generated 4660000  programs,  absolute time: 0:44:35.030465,  relative time: 0:00:05.597075
+Generated 4670000  programs,  absolute time: 0:44:40.647844,  relative time: 0:00:05.617379
+Generated 4680000  programs,  absolute time: 0:44:46.195148,  relative time: 0:00:05.547304
+Generated 4690000  programs,  absolute time: 0:44:51.808189,  relative time: 0:00:05.613041
+Generated 4700000  programs,  absolute time: 0:44:57.324128,  relative time: 0:00:05.515939
+Generated 4710000  programs,  absolute time: 0:45:02.932292,  relative time: 0:00:05.608164
+Generated 4720000  programs,  absolute time: 0:45:08.487811,  relative time: 0:00:05.555519
+Generated 4730000  programs,  absolute time: 0:45:14.054212,  relative time: 0:00:05.566401
+Generated 4740000  programs,  absolute time: 0:45:19.625959,  relative time: 0:00:05.571747
+Generated 4750000  programs,  absolute time: 0:45:25.184061,  relative time: 0:00:05.558102
+Generated 4760000  programs,  absolute time: 0:45:30.784784,  relative time: 0:00:05.600723
+Generated 4770000  programs,  absolute time: 0:45:36.495573,  relative time: 0:00:05.710789
+Generated 4780000  programs,  absolute time: 0:45:44.025063,  relative time: 0:00:07.529490
+Generated 4790000  programs,  absolute time: 0:45:49.623050,  relative time: 0:00:05.597987
+Generated 4800000  programs,  absolute time: 0:45:55.203414,  relative time: 0:00:05.580364
+Generated 4810000  programs,  absolute time: 0:46:00.852639,  relative time: 0:00:05.649225
+Generated 4820000  programs,  absolute time: 0:46:06.487847,  relative time: 0:00:05.635208
+Generated 4830000  programs,  absolute time: 0:46:12.074550,  relative time: 0:00:05.586703
+Generated 4840000  programs,  absolute time: 0:46:17.667919,  relative time: 0:00:05.593369
+Generated 4850000  programs,  absolute time: 0:46:23.211995,  relative time: 0:00:05.544076
+Generated 4860000  programs,  absolute time: 0:46:28.788545,  relative time: 0:00:05.576550
+Generated 4870000  programs,  absolute time: 0:46:34.387752,  relative time: 0:00:05.599207
+Generated 4880000  programs,  absolute time: 0:46:39.931161,  relative time: 0:00:05.543409
+Generated 4890000  programs,  absolute time: 0:46:45.537333,  relative time: 0:00:05.606172
+Generated 4900000  programs,  absolute time: 0:46:51.095394,  relative time: 0:00:05.558061
+Generated 4910000  programs,  absolute time: 0:46:56.939942,  relative time: 0:00:05.844548
+Generated 4920000  programs,  absolute time: 0:47:02.481501,  relative time: 0:00:05.541559
+Generated 4930000  programs,  absolute time: 0:47:08.046108,  relative time: 0:00:05.564607
+Generated 4930000  programs,  absolute time: 0:47:08.046915,  relative time: 0:00:00.000807
+Generated 4940000  programs,  absolute time: 0:47:13.887681,  relative time: 0:00:05.840766
+Generated 4950000  programs,  absolute time: 0:47:19.486064,  relative time: 0:00:05.598383
+Generated 4960000  programs,  absolute time: 0:47:25.075545,  relative time: 0:00:05.589481
+Generated 4970000  programs,  absolute time: 0:47:30.650775,  relative time: 0:00:05.575230
+Generated 4980000  programs,  absolute time: 0:47:36.241894,  relative time: 0:00:05.591119
+Generated 4990000  programs,  absolute time: 0:47:41.853769,  relative time: 0:00:05.611875
+Generated 5000000  programs,  absolute time: 0:47:47.452786,  relative time: 0:00:05.599017
+Generated 5010000  programs,  absolute time: 0:47:57.027908,  relative time: 0:00:09.575122
+Generated 5020000  programs,  absolute time: 0:48:02.620734,  relative time: 0:00:05.592826
+Generated 5030000  programs,  absolute time: 0:48:08.984974,  relative time: 0:00:06.364240
+Generated 5040000  programs,  absolute time: 0:48:14.577221,  relative time: 0:00:05.592247
+Generated 5050000  programs,  absolute time: 0:48:20.155026,  relative time: 0:00:05.577805
+Generated 5060000  programs,  absolute time: 0:48:25.733109,  relative time: 0:00:05.578083
+Generated 5070000  programs,  absolute time: 0:48:31.304186,  relative time: 0:00:05.571077
+Generated 5070000  programs,  absolute time: 0:48:31.304943,  relative time: 0:00:00.000757
+Generated 5080000  programs,  absolute time: 0:48:36.993626,  relative time: 0:00:05.688683
+Generated 5090000  programs,  absolute time: 0:48:44.616783,  relative time: 0:00:07.623157
+Generated 5100000  programs,  absolute time: 0:48:50.203748,  relative time: 0:00:05.586965
+Generated 5110000  programs,  absolute time: 0:48:55.752045,  relative time: 0:00:05.548297
+Generated 5110000  programs,  absolute time: 0:48:55.752753,  relative time: 0:00:00.000708
+Generated 5120000  programs,  absolute time: 0:49:01.391066,  relative time: 0:00:05.638313
+Generated 5130000  programs,  absolute time: 0:49:07.010070,  relative time: 0:00:05.619004
+Generated 5130000  programs,  absolute time: 0:49:07.010895,  relative time: 0:00:00.000825
+Generated 5140000  programs,  absolute time: 0:49:12.608984,  relative time: 0:00:05.598089
+Generated 5150000  programs,  absolute time: 0:49:18.181608,  relative time: 0:00:05.572624
+Generated 5160000  programs,  absolute time: 0:49:23.746567,  relative time: 0:00:05.564959
+Generated 5170000  programs,  absolute time: 0:49:29.314908,  relative time: 0:00:05.568341
+Generated 5180000  programs,  absolute time: 0:49:34.915486,  relative time: 0:00:05.600578
+Generated 5190000  programs,  absolute time: 0:49:40.532458,  relative time: 0:00:05.616972
+Generated 5200000  programs,  absolute time: 0:49:46.122760,  relative time: 0:00:05.590302
+Generated 5210000  programs,  absolute time: 0:49:51.673725,  relative time: 0:00:05.550965
+Generated 5220000  programs,  absolute time: 0:49:57.266206,  relative time: 0:00:05.592481
+Generated 5230000  programs,  absolute time: 0:50:02.817488,  relative time: 0:00:05.551282
+Generated 5240000  programs,  absolute time: 0:50:08.396316,  relative time: 0:00:05.578828
+Generated 5250000  programs,  absolute time: 0:50:14.006389,  relative time: 0:00:05.610073
+Generated 5260000  programs,  absolute time: 0:50:21.559526,  relative time: 0:00:07.553137
+Generated 5270000  programs,  absolute time: 0:50:27.328823,  relative time: 0:00:05.769297
+Generated 5280000  programs,  absolute time: 0:50:33.004114,  relative time: 0:00:05.675291
+Generated 5290000  programs,  absolute time: 0:50:38.556776,  relative time: 0:00:05.552662
+Generated 5300000  programs,  absolute time: 0:50:44.100506,  relative time: 0:00:05.543730
+Generated 5310000  programs,  absolute time: 0:50:49.713151,  relative time: 0:00:05.612645
+Generated 5320000  programs,  absolute time: 0:50:55.336427,  relative time: 0:00:05.623276
+Generated 5330000  programs,  absolute time: 0:51:00.935288,  relative time: 0:00:05.598861
+Generated 5340000  programs,  absolute time: 0:51:06.510681,  relative time: 0:00:05.575393
+Generated 5350000  programs,  absolute time: 0:51:12.340196,  relative time: 0:00:05.829515
+Generated 5360000  programs,  absolute time: 0:51:17.874026,  relative time: 0:00:05.533830
+Generated 5370000  programs,  absolute time: 0:51:23.459103,  relative time: 0:00:05.585077
+Generated 5380000  programs,  absolute time: 0:51:29.056220,  relative time: 0:00:05.597117
+Generated 5390000  programs,  absolute time: 0:51:34.668248,  relative time: 0:00:05.612028
+Generated 5400000  programs,  absolute time: 0:51:40.290801,  relative time: 0:00:05.622553
+Generated 5410000  programs,  absolute time: 0:51:45.858199,  relative time: 0:00:05.567398
+Generated 5420000  programs,  absolute time: 0:51:51.450626,  relative time: 0:00:05.592427
+Generated 5430000  programs,  absolute time: 0:51:57.046612,  relative time: 0:00:05.595986
+Generated 5440000  programs,  absolute time: 0:52:02.778253,  relative time: 0:00:05.731641
+Generated 5450000  programs,  absolute time: 0:52:08.371698,  relative time: 0:00:05.593445
+Generated 5450000  programs,  absolute time: 0:52:08.371836,  relative time: 0:00:00.000138
+Generated 5460000  programs,  absolute time: 0:52:13.975019,  relative time: 0:00:05.603183
+Generated 5470000  programs,  absolute time: 0:52:19.518147,  relative time: 0:00:05.543128
+Generated 5480000  programs,  absolute time: 0:52:25.132230,  relative time: 0:00:05.614083
+Generated 5490000  programs,  absolute time: 0:52:30.749444,  relative time: 0:00:05.617214
+Generated 5500000  programs,  absolute time: 0:52:36.357342,  relative time: 0:00:05.607898
+Generated 5510000  programs,  absolute time: 0:52:41.891637,  relative time: 0:00:05.534295
+Generated 5520000  programs,  absolute time: 0:52:47.524787,  relative time: 0:00:05.633150
+Generated 5530000  programs,  absolute time: 0:52:53.112318,  relative time: 0:00:05.587531
+Generated 5540000  programs,  absolute time: 0:52:58.707392,  relative time: 0:00:05.595074
+Generated 5550000  programs,  absolute time: 0:53:04.283161,  relative time: 0:00:05.575769
+Generated 5560000  programs,  absolute time: 0:53:09.884443,  relative time: 0:00:05.601282
+Generated 5570000  programs,  absolute time: 0:53:15.464419,  relative time: 0:00:05.579976
+Generated 5580000  programs,  absolute time: 0:53:21.048387,  relative time: 0:00:05.583968
+Generated 5590000  programs,  absolute time: 0:53:26.618228,  relative time: 0:00:05.569841
+Generated 5600000  programs,  absolute time: 0:53:32.193626,  relative time: 0:00:05.575398
+Generated 5610000  programs,  absolute time: 0:53:37.825465,  relative time: 0:00:05.631839
+Generated 5620000  programs,  absolute time: 0:53:43.393505,  relative time: 0:00:05.568040
+Generated 5630000  programs,  absolute time: 0:53:49.007629,  relative time: 0:00:05.614124
+Generated 5640000  programs,  absolute time: 0:53:54.634768,  relative time: 0:00:05.627139
+Generated 5650000  programs,  absolute time: 0:54:00.259196,  relative time: 0:00:05.624428
+Generated 5660000  programs,  absolute time: 0:54:05.852187,  relative time: 0:00:05.592991
+Generated 5670000  programs,  absolute time: 0:54:13.427778,  relative time: 0:00:07.575591
+Generated 5680000  programs,  absolute time: 0:54:19.875372,  relative time: 0:00:06.447594
+Generated 5690000  programs,  absolute time: 0:54:27.468572,  relative time: 0:00:07.593200
+Generated 5700000  programs,  absolute time: 0:54:33.054040,  relative time: 0:00:05.585468
+Generated 5710000  programs,  absolute time: 0:54:38.629643,  relative time: 0:00:05.575603
+Generated 5720000  programs,  absolute time: 0:54:44.195420,  relative time: 0:00:05.565777
+Generated 5730000  programs,  absolute time: 0:54:49.774910,  relative time: 0:00:05.579490
+Generated 5740000  programs,  absolute time: 0:54:55.364019,  relative time: 0:00:05.589109
+Generated 5750000  programs,  absolute time: 0:55:00.893604,  relative time: 0:00:05.529585
+Generated 5760000  programs,  absolute time: 0:55:06.520689,  relative time: 0:00:05.627085
+Generated 5770000  programs,  absolute time: 0:55:12.135396,  relative time: 0:00:05.614707
+Generated 5780000  programs,  absolute time: 0:55:17.702689,  relative time: 0:00:05.567293
+Generated 5790000  programs,  absolute time: 0:55:23.312533,  relative time: 0:00:05.609844
+Generated 5800000  programs,  absolute time: 0:55:28.915494,  relative time: 0:00:05.602961
+Generated 5810000  programs,  absolute time: 0:55:34.531560,  relative time: 0:00:05.616066
+Generated 5820000  programs,  absolute time: 0:55:40.141903,  relative time: 0:00:05.610343
+Generated 5830000  programs,  absolute time: 0:55:45.758900,  relative time: 0:00:05.616997
+Generated 5840000  programs,  absolute time: 0:55:51.352109,  relative time: 0:00:05.593209
+Generated 5850000  programs,  absolute time: 0:55:56.979577,  relative time: 0:00:05.627468
+Generated 5860000  programs,  absolute time: 0:56:02.564013,  relative time: 0:00:05.584436
+Generated 5870000  programs,  absolute time: 0:56:08.134530,  relative time: 0:00:05.570517
+Generated 5880000  programs,  absolute time: 0:56:13.674705,  relative time: 0:00:05.540175
+Generated 5890000  programs,  absolute time: 0:56:19.277386,  relative time: 0:00:05.602681
+Generated 5900000  programs,  absolute time: 1:01:43.954490,  relative time: 0:05:24.677104
+Generated 5910000  programs,  absolute time: 1:01:49.523335,  relative time: 0:00:05.568845
+Generated 5920000  programs,  absolute time: 1:01:55.188959,  relative time: 0:00:05.665624
+Generated 5930000  programs,  absolute time: 1:02:01.893214,  relative time: 0:00:06.704255
+Generated 5940000  programs,  absolute time: 1:02:07.539392,  relative time: 0:00:05.646178
+Generated 5950000  programs,  absolute time: 1:02:13.199975,  relative time: 0:00:05.660583
+Generated 5960000  programs,  absolute time: 1:02:18.856241,  relative time: 0:00:05.656266
+Generated 5970000  programs,  absolute time: 1:02:24.554446,  relative time: 0:00:05.698205
+Generated 5980000  programs,  absolute time: 1:02:30.180082,  relative time: 0:00:05.625636
+Generated 5990000  programs,  absolute time: 1:02:35.787382,  relative time: 0:00:05.607300
+Generated 6000000  programs,  absolute time: 1:02:41.530498,  relative time: 0:00:05.743116
+Generated 6010000  programs,  absolute time: 1:02:47.190369,  relative time: 0:00:05.659871
+Generated 6010000  programs,  absolute time: 1:02:47.191294,  relative time: 0:00:00.000925
+Generated 6020000  programs,  absolute time: 1:02:52.785780,  relative time: 0:00:05.594486
+Generated 6030000  programs,  absolute time: 1:02:58.402374,  relative time: 0:00:05.616594
+Generated 6040000  programs,  absolute time: 1:03:04.038778,  relative time: 0:00:05.636404
+Generated 6050000  programs,  absolute time: 1:03:09.633305,  relative time: 0:00:05.594527
+Generated 6060000  programs,  absolute time: 1:03:15.284401,  relative time: 0:00:05.651096
+Generated 6070000  programs,  absolute time: 1:03:22.918537,  relative time: 0:00:07.634136
+Generated 6080000  programs,  absolute time: 1:03:28.539119,  relative time: 0:00:05.620582
+Generated 6080000  programs,  absolute time: 1:03:28.539948,  relative time: 0:00:00.000829
+Generated 6090000  programs,  absolute time: 1:03:34.177283,  relative time: 0:00:05.637335
+Generated 6100000  programs,  absolute time: 1:03:39.829856,  relative time: 0:00:05.652573
+Generated 6110000  programs,  absolute time: 1:03:45.484163,  relative time: 0:00:05.654307
+Generated 6120000  programs,  absolute time: 1:03:51.128913,  relative time: 0:00:05.644750
+Generated 6130000  programs,  absolute time: 1:03:56.753253,  relative time: 0:00:05.624340
+Generated 6140000  programs,  absolute time: 1:04:04.381101,  relative time: 0:00:07.627848
+Generated 6150000  programs,  absolute time: 1:04:10.062729,  relative time: 0:00:05.681628
+Generated 6160000  programs,  absolute time: 1:04:15.694341,  relative time: 0:00:05.631612
+Generated 6170000  programs,  absolute time: 1:04:21.334696,  relative time: 0:00:05.640355
+Generated 6180000  programs,  absolute time: 1:04:28.925952,  relative time: 0:00:07.591256
+Generated 6190000  programs,  absolute time: 1:04:34.617762,  relative time: 0:00:05.691810
+Generated 6190000  programs,  absolute time: 1:04:34.618371,  relative time: 0:00:00.000609
+Generated 6200000  programs,  absolute time: 1:04:42.276370,  relative time: 0:00:07.657999
+Generated 6210000  programs,  absolute time: 1:04:47.947041,  relative time: 0:00:05.670671
+Generated 6210000  programs,  absolute time: 1:04:47.947860,  relative time: 0:00:00.000819
+Generated 6220000  programs,  absolute time: 1:04:53.552084,  relative time: 0:00:05.604224
+Generated 6230000  programs,  absolute time: 1:04:59.131331,  relative time: 0:00:05.579247
+Generated 6240000  programs,  absolute time: 1:05:07.387270,  relative time: 0:00:08.255939
+Generated 6250000  programs,  absolute time: 1:05:13.004072,  relative time: 0:00:05.616802
+Generated 6260000  programs,  absolute time: 1:05:18.576938,  relative time: 0:00:05.572866
+Generated 6270000  programs,  absolute time: 1:05:24.158907,  relative time: 0:00:05.581969
+Generated 6270000  programs,  absolute time: 1:05:24.159426,  relative time: 0:00:00.000519
+Generated 6280000  programs,  absolute time: 1:05:29.710394,  relative time: 0:00:05.550968
+Generated 6290000  programs,  absolute time: 1:05:35.338180,  relative time: 0:00:05.627786
+Generated 6300000  programs,  absolute time: 1:05:40.965310,  relative time: 0:00:05.627130
+Generated 6310000  programs,  absolute time: 1:05:48.625938,  relative time: 0:00:07.660628
+Generated 6320000  programs,  absolute time: 1:05:54.254965,  relative time: 0:00:05.629027
+Generated 6330000  programs,  absolute time: 1:05:59.832152,  relative time: 0:00:05.577187
+Generated 6340000  programs,  absolute time: 1:06:05.482532,  relative time: 0:00:05.650380
+Generated 6350000  programs,  absolute time: 1:06:11.070253,  relative time: 0:00:05.587721
+Generated 6360000  programs,  absolute time: 1:06:16.671202,  relative time: 0:00:05.600949
+Generated 6370000  programs,  absolute time: 1:06:22.284058,  relative time: 0:00:05.612856
+Generated 6380000  programs,  absolute time: 1:06:27.847039,  relative time: 0:00:05.562981
+Generated 6390000  programs,  absolute time: 1:06:33.419727,  relative time: 0:00:05.572688
+Generated 6400000  programs,  absolute time: 1:06:39.012521,  relative time: 0:00:05.592794
+Generated 6400000  programs,  absolute time: 1:06:39.012674,  relative time: 0:00:00.000153
+Generated 6410000  programs,  absolute time: 1:06:44.594820,  relative time: 0:00:05.582146
+Generated 6420000  programs,  absolute time: 1:06:50.139777,  relative time: 0:00:05.544957
+Generated 6430000  programs,  absolute time: 1:06:55.720766,  relative time: 0:00:05.580989
+Generated 6440000  programs,  absolute time: 1:07:01.268452,  relative time: 0:00:05.547686
+Generated 6450000  programs,  absolute time: 1:07:08.869952,  relative time: 0:00:07.601500
+Generated 6460000  programs,  absolute time: 1:07:14.494601,  relative time: 0:00:05.624649
+Generated 6470000  programs,  absolute time: 1:07:20.097456,  relative time: 0:00:05.602855
+Generated 6480000  programs,  absolute time: 1:07:25.692978,  relative time: 0:00:05.595522
+Generated 6490000  programs,  absolute time: 1:07:31.337815,  relative time: 0:00:05.644837
+Generated 6500000  programs,  absolute time: 1:07:36.926890,  relative time: 0:00:05.589075
+Generated 6510000  programs,  absolute time: 1:07:42.502960,  relative time: 0:00:05.576070
+Generated 6520000  programs,  absolute time: 1:07:48.150011,  relative time: 0:00:05.647051
+Generated 6530000  programs,  absolute time: 1:07:53.725635,  relative time: 0:00:05.575624
+Generated 6540000  programs,  absolute time: 1:07:59.285312,  relative time: 0:00:05.559677
+Generated 6550000  programs,  absolute time: 1:08:04.871747,  relative time: 0:00:05.586435
+Generated 6560000  programs,  absolute time: 1:08:10.466015,  relative time: 0:00:05.594268
+Generated 6570000  programs,  absolute time: 1:08:16.083890,  relative time: 0:00:05.617875
+Generated 6580000  programs,  absolute time: 1:08:21.726134,  relative time: 0:00:05.642244
+Generated 6590000  programs,  absolute time: 1:08:27.290328,  relative time: 0:00:05.564194
+Generated 6590000  programs,  absolute time: 1:08:27.291091,  relative time: 0:00:00.000763
+Generated 6600000  programs,  absolute time: 1:08:32.878227,  relative time: 0:00:05.587136
+Generated 6610000  programs,  absolute time: 1:08:38.448122,  relative time: 0:00:05.569895
+Generated 6620000  programs,  absolute time: 1:08:44.072414,  relative time: 0:00:05.624292
+Generated 6620000  programs,  absolute time: 1:08:44.073117,  relative time: 0:00:00.000703
+Generated 6630000  programs,  absolute time: 1:08:49.679998,  relative time: 0:00:05.606881
+Generated 6640000  programs,  absolute time: 1:08:55.270286,  relative time: 0:00:05.590288
+Generated 6650000  programs,  absolute time: 1:09:00.827073,  relative time: 0:00:05.556787
+Generated 6660000  programs,  absolute time: 1:09:06.425070,  relative time: 0:00:05.597997
+Generated 6670000  programs,  absolute time: 1:09:12.001156,  relative time: 0:00:05.576086
+Generated 6680000  programs,  absolute time: 1:09:17.624460,  relative time: 0:00:05.623304
+Generated 6690000  programs,  absolute time: 1:09:23.200236,  relative time: 0:00:05.575776
+Generated 6700000  programs,  absolute time: 1:09:30.793161,  relative time: 0:00:07.592925
+Generated 6710000  programs,  absolute time: 1:09:36.373697,  relative time: 0:00:05.580536
+Generated 6720000  programs,  absolute time: 1:09:41.947189,  relative time: 0:00:05.573492
+Generated 6730000  programs,  absolute time: 1:09:47.560052,  relative time: 0:00:05.612863
+Generated 6730000  programs,  absolute time: 1:09:47.560184,  relative time: 0:00:00.000132
+Generated 6740000  programs,  absolute time: 1:09:53.266932,  relative time: 0:00:05.706748
+Generated 6750000  programs,  absolute time: 1:09:58.852443,  relative time: 0:00:05.585511
+Generated 6750000  programs,  absolute time: 1:09:58.852597,  relative time: 0:00:00.000154
+Generated 6760000  programs,  absolute time: 1:10:04.467962,  relative time: 0:00:05.615365
+Generated 6770000  programs,  absolute time: 1:10:10.049432,  relative time: 0:00:05.581470
+Generated 6780000  programs,  absolute time: 1:10:17.663249,  relative time: 0:00:07.613817
+Generated 6790000  programs,  absolute time: 1:10:23.253463,  relative time: 0:00:05.590214
+Generated 6800000  programs,  absolute time: 1:10:28.872970,  relative time: 0:00:05.619507
+Generated 6810000  programs,  absolute time: 1:10:34.485201,  relative time: 0:00:05.612231
+Generated 6820000  programs,  absolute time: 1:10:40.141681,  relative time: 0:00:05.656480
+Generated 6830000  programs,  absolute time: 1:10:45.838517,  relative time: 0:00:05.696836
+Generated 6840000  programs,  absolute time: 1:10:51.477707,  relative time: 0:00:05.639190
+Generated 6850000  programs,  absolute time: 1:10:57.155791,  relative time: 0:00:05.678084
+Generated 6860000  programs,  absolute time: 1:11:02.829980,  relative time: 0:00:05.674189
+Generated 6870000  programs,  absolute time: 1:11:08.547208,  relative time: 0:00:05.717228
+Generated 6880000  programs,  absolute time: 1:11:14.185014,  relative time: 0:00:05.637806
+Generated 6890000  programs,  absolute time: 1:11:19.867569,  relative time: 0:00:05.682555
+Generated 6900000  programs,  absolute time: 1:11:25.544793,  relative time: 0:00:05.677224
+Generated 6910000  programs,  absolute time: 1:11:31.174130,  relative time: 0:00:05.629337
+Generated 6920000  programs,  absolute time: 1:11:36.812222,  relative time: 0:00:05.638092
+Generated 6920000  programs,  absolute time: 1:11:36.812377,  relative time: 0:00:00.000155
+Generated 6930000  programs,  absolute time: 1:11:42.464341,  relative time: 0:00:05.651964
+Generated 6940000  programs,  absolute time: 1:11:48.134455,  relative time: 0:00:05.670114
+Generated 6950000  programs,  absolute time: 1:11:53.811932,  relative time: 0:00:05.677477
+Generated 6960000  programs,  absolute time: 1:11:59.470977,  relative time: 0:00:05.659045
+Generated 6970000  programs,  absolute time: 1:12:05.103837,  relative time: 0:00:05.632860
+Generated 6980000  programs,  absolute time: 1:12:10.763681,  relative time: 0:00:05.659844
+Generated 6990000  programs,  absolute time: 1:12:16.368337,  relative time: 0:00:05.604656
+Generated 6990000  programs,  absolute time: 1:12:16.369147,  relative time: 0:00:00.000810
+Generated 7000000  programs,  absolute time: 1:12:22.000028,  relative time: 0:00:05.630881
+Generated 7010000  programs,  absolute time: 1:12:27.707962,  relative time: 0:00:05.707934
+Generated 7020000  programs,  absolute time: 1:12:33.327523,  relative time: 0:00:05.619561
+Generated 7030000  programs,  absolute time: 1:12:38.966182,  relative time: 0:00:05.638659
+Generated 7040000  programs,  absolute time: 1:12:44.618789,  relative time: 0:00:05.652607
+Generated 7050000  programs,  absolute time: 1:12:50.203597,  relative time: 0:00:05.584808
+Generated 7060000  programs,  absolute time: 1:12:55.871548,  relative time: 0:00:05.667951
+Generated 7060000  programs,  absolute time: 1:12:55.872376,  relative time: 0:00:00.000828
+Generated 7070000  programs,  absolute time: 1:13:01.568634,  relative time: 0:00:05.696258
+Generated 7080000  programs,  absolute time: 1:13:07.245574,  relative time: 0:00:05.676940
+Generated 7090000  programs,  absolute time: 1:13:12.879697,  relative time: 0:00:05.634123
+Generated 7100000  programs,  absolute time: 1:13:18.529441,  relative time: 0:00:05.649744
+Generated 7110000  programs,  absolute time: 1:13:24.159400,  relative time: 0:00:05.629959
+Generated 7120000  programs,  absolute time: 1:13:29.784096,  relative time: 0:00:05.624696
+Generated 7130000  programs,  absolute time: 1:13:35.440675,  relative time: 0:00:05.656579
+Generated 7140000  programs,  absolute time: 1:13:41.113537,  relative time: 0:00:05.672862
+Generated 7150000  programs,  absolute time: 1:13:46.763612,  relative time: 0:00:05.650075
+Generated 7160000  programs,  absolute time: 1:13:52.370593,  relative time: 0:00:05.606981
+Generated 7170000  programs,  absolute time: 1:13:58.009636,  relative time: 0:00:05.639043
+Generated 7180000  programs,  absolute time: 1:14:03.691056,  relative time: 0:00:05.681420
+Generated 7190000  programs,  absolute time: 1:14:09.359378,  relative time: 0:00:05.668322
+Generated 7200000  programs,  absolute time: 1:14:15.020161,  relative time: 0:00:05.660783
+Generated 7210000  programs,  absolute time: 1:14:20.689597,  relative time: 0:00:05.669436
+Generated 7220000  programs,  absolute time: 1:14:26.408319,  relative time: 0:00:05.718722
+Generated 7230000  programs,  absolute time: 1:14:32.009133,  relative time: 0:00:05.600814
+Generated 7240000  programs,  absolute time: 1:14:37.652594,  relative time: 0:00:05.643461
+Generated 7250000  programs,  absolute time: 1:14:43.311054,  relative time: 0:00:05.658460
+Generated 7260000  programs,  absolute time: 1:14:48.943228,  relative time: 0:00:05.632174
+Generated 7270000  programs,  absolute time: 1:14:56.563795,  relative time: 0:00:07.620567
+Generated 7280000  programs,  absolute time: 1:15:02.236400,  relative time: 0:00:05.672605
+Generated 7290000  programs,  absolute time: 1:15:07.849549,  relative time: 0:00:05.613149
+Generated 7300000  programs,  absolute time: 1:15:13.513438,  relative time: 0:00:05.663889
+Generated 7310000  programs,  absolute time: 1:15:19.146387,  relative time: 0:00:05.632949
+Generated 7320000  programs,  absolute time: 1:15:24.774845,  relative time: 0:00:05.628458
+Generated 7330000  programs,  absolute time: 1:15:30.445860,  relative time: 0:00:05.671015
+Generated 7340000  programs,  absolute time: 1:15:36.108340,  relative time: 0:00:05.662480
+Generated 7340000  programs,  absolute time: 1:15:36.108528,  relative time: 0:00:00.000188
+Generated 7350000  programs,  absolute time: 1:15:41.752656,  relative time: 0:00:05.644128
+Generated 7360000  programs,  absolute time: 1:15:47.375557,  relative time: 0:00:05.622901
+Generated 7370000  programs,  absolute time: 1:15:53.011181,  relative time: 0:00:05.635624
+Generated 7380000  programs,  absolute time: 1:15:58.677806,  relative time: 0:00:05.666625
+Generated 7390000  programs,  absolute time: 1:16:04.338459,  relative time: 0:00:05.660653
+Generated 7400000  programs,  absolute time: 1:16:10.000732,  relative time: 0:00:05.662273
+Generated 7410000  programs,  absolute time: 1:16:15.608277,  relative time: 0:00:05.607545
+Generated 7420000  programs,  absolute time: 1:16:21.260095,  relative time: 0:00:05.651818
+Generated 7430000  programs,  absolute time: 1:16:26.949106,  relative time: 0:00:05.689011
+Generated 7440000  programs,  absolute time: 1:16:32.617293,  relative time: 0:00:05.668187
+Generated 7450000  programs,  absolute time: 1:16:38.259160,  relative time: 0:00:05.641867
+Generated 7460000  programs,  absolute time: 1:16:43.930649,  relative time: 0:00:05.671489
+Generated 7470000  programs,  absolute time: 1:16:49.792037,  relative time: 0:00:05.861388
+Generated 7480000  programs,  absolute time: 1:16:55.424361,  relative time: 0:00:05.632324
+Generated 7490000  programs,  absolute time: 1:17:01.040298,  relative time: 0:00:05.615937
+Generated 7500000  programs,  absolute time: 1:17:06.726274,  relative time: 0:00:05.685976
+Generated 7510000  programs,  absolute time: 1:17:12.357566,  relative time: 0:00:05.631292
+Generated 7520000  programs,  absolute time: 1:17:17.994437,  relative time: 0:00:05.636871
+Generated 7530000  programs,  absolute time: 1:17:23.604526,  relative time: 0:00:05.610089
+Generated 7540000  programs,  absolute time: 1:17:29.233348,  relative time: 0:00:05.628822
+Generated 7550000  programs,  absolute time: 1:17:34.851350,  relative time: 0:00:05.618002
+Generated 7560000  programs,  absolute time: 1:17:40.468888,  relative time: 0:00:05.617538
+Generated 7570000  programs,  absolute time: 1:17:46.054606,  relative time: 0:00:05.585718
+Generated 7580000  programs,  absolute time: 1:17:51.649591,  relative time: 0:00:05.594985
+Generated 7590000  programs,  absolute time: 1:17:57.285968,  relative time: 0:00:05.636377
+Generated 7600000  programs,  absolute time: 1:18:02.873345,  relative time: 0:00:05.587377
+Generated 7610000  programs,  absolute time: 1:18:08.479775,  relative time: 0:00:05.606430
+Generated 7620000  programs,  absolute time: 1:18:14.073862,  relative time: 0:00:05.594087
+Generated 7630000  programs,  absolute time: 1:18:19.655057,  relative time: 0:00:05.581195
+Generated 7640000  programs,  absolute time: 1:18:25.219413,  relative time: 0:00:05.564356
+Generated 7650000  programs,  absolute time: 1:18:30.761632,  relative time: 0:00:05.542219
+Generated 7660000  programs,  absolute time: 1:18:36.413326,  relative time: 0:00:05.651694
+Generated 7670000  programs,  absolute time: 1:18:42.020565,  relative time: 0:00:05.607239
+Generated 7680000  programs,  absolute time: 1:18:47.597340,  relative time: 0:00:05.576775
+Generated 7690000  programs,  absolute time: 1:18:53.203851,  relative time: 0:00:05.606511
+Generated 7700000  programs,  absolute time: 1:18:58.838141,  relative time: 0:00:05.634290
+Generated 7710000  programs,  absolute time: 1:19:04.405777,  relative time: 0:00:05.567636
+Generated 7720000  programs,  absolute time: 1:19:09.971006,  relative time: 0:00:05.565229
+Generated 7730000  programs,  absolute time: 1:19:15.589178,  relative time: 0:00:05.618172
+Generated 7740000  programs,  absolute time: 1:19:21.206389,  relative time: 0:00:05.617211
+Generated 7750000  programs,  absolute time: 1:19:26.761101,  relative time: 0:00:05.554712
+Generated 7760000  programs,  absolute time: 1:19:32.359973,  relative time: 0:00:05.598872
+Generated 7770000  programs,  absolute time: 1:19:37.902733,  relative time: 0:00:05.542760
+Generated 7780000  programs,  absolute time: 1:19:43.516761,  relative time: 0:00:05.614028
+Generated 7790000  programs,  absolute time: 1:19:51.130084,  relative time: 0:00:07.613323
+Generated 7800000  programs,  absolute time: 1:19:56.707999,  relative time: 0:00:05.577915
+Generated 7810000  programs,  absolute time: 1:20:02.305642,  relative time: 0:00:05.597643
+Generated 7820000  programs,  absolute time: 1:20:07.931539,  relative time: 0:00:05.625897
+Generated 7830000  programs,  absolute time: 1:20:13.547724,  relative time: 0:00:05.616185
+Generated 7840000  programs,  absolute time: 1:20:19.158741,  relative time: 0:00:05.611017
+Generated 7840000  programs,  absolute time: 1:20:19.159207,  relative time: 0:00:00.000466
+Generated 7850000  programs,  absolute time: 1:20:24.794310,  relative time: 0:00:05.635103
+Generated 7860000  programs,  absolute time: 1:20:30.405470,  relative time: 0:00:05.611160
+Generated 7870000  programs,  absolute time: 1:20:36.008134,  relative time: 0:00:05.602664
+Generated 7880000  programs,  absolute time: 1:20:41.621913,  relative time: 0:00:05.613779
+Generated 7890000  programs,  absolute time: 1:20:47.229585,  relative time: 0:00:05.607672
+Generated 7900000  programs,  absolute time: 1:20:52.872064,  relative time: 0:00:05.642479
+Generated 7910000  programs,  absolute time: 1:20:58.472240,  relative time: 0:00:05.600176
+Generated 7920000  programs,  absolute time: 1:21:04.074192,  relative time: 0:00:05.601952
+Generated 7930000  programs,  absolute time: 1:21:09.709307,  relative time: 0:00:05.635115
+Generated 7940000  programs,  absolute time: 1:21:16.574559,  relative time: 0:00:06.865252
+Generated 7950000  programs,  absolute time: 1:21:22.261190,  relative time: 0:00:05.686631
+Generated 7960000  programs,  absolute time: 1:21:27.924565,  relative time: 0:00:05.663375
+Generated 7970000  programs,  absolute time: 1:21:33.485214,  relative time: 0:00:05.560649
+Generated 7980000  programs,  absolute time: 1:21:39.051213,  relative time: 0:00:05.565999
+Generated 7990000  programs,  absolute time: 1:21:44.704010,  relative time: 0:00:05.652797
+Generated 8000000  programs,  absolute time: 1:21:50.254989,  relative time: 0:00:05.550979
+Generated 8010000  programs,  absolute time: 1:21:55.858795,  relative time: 0:00:05.603806
+Generated 8020000  programs,  absolute time: 1:22:01.461741,  relative time: 0:00:05.602946
+Generated 8030000  programs,  absolute time: 1:22:07.079663,  relative time: 0:00:05.617922
+Generated 8040000  programs,  absolute time: 1:22:14.670695,  relative time: 0:00:07.591032
+Generated 8050000  programs,  absolute time: 1:22:22.312567,  relative time: 0:00:07.641872
+Generated 8060000  programs,  absolute time: 1:22:27.899472,  relative time: 0:00:05.586905
+Generated 8070000  programs,  absolute time: 1:22:33.532460,  relative time: 0:00:05.632988
+Generated 8080000  programs,  absolute time: 1:22:39.209184,  relative time: 0:00:05.676724
+Generated 8090000  programs,  absolute time: 1:22:44.853664,  relative time: 0:00:05.644480
+Generated 8100000  programs,  absolute time: 1:22:52.509210,  relative time: 0:00:07.655546
+Generated 8110000  programs,  absolute time: 1:22:58.117673,  relative time: 0:00:05.608463
+Generated 8120000  programs,  absolute time: 1:23:03.770785,  relative time: 0:00:05.653112
+Generated 8130000  programs,  absolute time: 1:23:09.394704,  relative time: 0:00:05.623919
+Generated 8140000  programs,  absolute time: 1:23:15.040386,  relative time: 0:00:05.645682
+Generated 8150000  programs,  absolute time: 1:23:20.654486,  relative time: 0:00:05.614100
+Generated 8160000  programs,  absolute time: 1:23:28.360241,  relative time: 0:00:07.705755
+Generated 8170000  programs,  absolute time: 1:23:34.122248,  relative time: 0:00:05.762007
+Generated 8180000  programs,  absolute time: 1:23:39.718078,  relative time: 0:00:05.595830
+Generated 8190000  programs,  absolute time: 1:23:45.348126,  relative time: 0:00:05.630048
+Generated 8200000  programs,  absolute time: 1:23:51.018820,  relative time: 0:00:05.670694
+Generated 8210000  programs,  absolute time: 1:23:56.676016,  relative time: 0:00:05.657196
+Generated 8220000  programs,  absolute time: 1:24:02.323449,  relative time: 0:00:05.647433
+Generated 8220000  programs,  absolute time: 1:24:02.323618,  relative time: 0:00:00.000169
+Generated 8220000  programs,  absolute time: 1:24:02.324160,  relative time: 0:00:00.000542
+Generated 8230000  programs,  absolute time: 1:24:07.947042,  relative time: 0:00:05.622882
+Generated 8240000  programs,  absolute time: 1:24:13.597771,  relative time: 0:00:05.650729
+Generated 8250000  programs,  absolute time: 1:24:19.251729,  relative time: 0:00:05.653958
+Generated 8260000  programs,  absolute time: 1:24:24.919165,  relative time: 0:00:05.667436
+Generated 8270000  programs,  absolute time: 1:24:30.569903,  relative time: 0:00:05.650738
+Generated 8280000  programs,  absolute time: 1:24:36.233553,  relative time: 0:00:05.663650
+Generated 8290000  programs,  absolute time: 1:24:41.888983,  relative time: 0:00:05.655430
+Generated 8300000  programs,  absolute time: 1:24:47.499315,  relative time: 0:00:05.610332
+Generated 8310000  programs,  absolute time: 1:24:53.152219,  relative time: 0:00:05.652904
+Generated 8320000  programs,  absolute time: 1:24:58.801207,  relative time: 0:00:05.648988
+Generated 8320000  programs,  absolute time: 1:24:58.801365,  relative time: 0:00:00.000158
+Generated 8330000  programs,  absolute time: 1:25:04.477255,  relative time: 0:00:05.675890
+Generated 8340000  programs,  absolute time: 1:25:11.242422,  relative time: 0:00:06.765167
+Generated 8350000  programs,  absolute time: 1:25:16.924157,  relative time: 0:00:05.681735
+Generated 8360000  programs,  absolute time: 1:25:22.573479,  relative time: 0:00:05.649322
+Generated 8360000  programs,  absolute time: 1:25:22.573641,  relative time: 0:00:00.000162
+Generated 8370000  programs,  absolute time: 1:25:28.234530,  relative time: 0:00:05.660889
+Generated 8380000  programs,  absolute time: 1:25:33.860482,  relative time: 0:00:05.625952
+Generated 8390000  programs,  absolute time: 1:25:39.554427,  relative time: 0:00:05.693945
+Generated 8390000  programs,  absolute time: 1:25:39.554589,  relative time: 0:00:00.000162
+Generated 8400000  programs,  absolute time: 1:25:45.212130,  relative time: 0:00:05.657541
+Generated 8410000  programs,  absolute time: 1:25:52.824763,  relative time: 0:00:07.612633
+Generated 8420000  programs,  absolute time: 1:25:58.457205,  relative time: 0:00:05.632442
+Generated 8420000  programs,  absolute time: 1:25:58.458046,  relative time: 0:00:00.000841
+Generated 8430000  programs,  absolute time: 1:26:04.145973,  relative time: 0:00:05.687927
+Generated 8440000  programs,  absolute time: 1:26:09.790249,  relative time: 0:00:05.644276
+Generated 8450000  programs,  absolute time: 1:26:15.404852,  relative time: 0:00:05.614603
+Generated 8460000  programs,  absolute time: 1:26:21.048165,  relative time: 0:00:05.643313
+Generated 8460000  programs,  absolute time: 1:26:21.048914,  relative time: 0:00:00.000749
+Generated 8470000  programs,  absolute time: 1:26:26.662373,  relative time: 0:00:05.613459
+Generated 8480000  programs,  absolute time: 1:26:32.301470,  relative time: 0:00:05.639097
+Generated 8490000  programs,  absolute time: 1:26:38.082901,  relative time: 0:00:05.781431
+Generated 8500000  programs,  absolute time: 1:26:45.667197,  relative time: 0:00:07.584296
+Generated 8510000  programs,  absolute time: 1:26:51.199008,  relative time: 0:00:05.531811
+Generated 8520000  programs,  absolute time: 1:26:56.868119,  relative time: 0:00:05.669111
+Generated 8530000  programs,  absolute time: 1:27:02.474486,  relative time: 0:00:05.606367
+Generated 8540000  programs,  absolute time: 1:27:10.048503,  relative time: 0:00:07.574017
+Generated 8550000  programs,  absolute time: 1:27:15.645583,  relative time: 0:00:05.597080
+Generated 8560000  programs,  absolute time: 1:27:21.301709,  relative time: 0:00:05.656126
+Generated 8570000  programs,  absolute time: 1:27:26.887211,  relative time: 0:00:05.585502
+Generated 8580000  programs,  absolute time: 1:27:32.529831,  relative time: 0:00:05.642620
+Generated 8590000  programs,  absolute time: 1:27:38.170095,  relative time: 0:00:05.640264
+Generated 8600000  programs,  absolute time: 1:27:43.813106,  relative time: 0:00:05.643011
+Generated 8610000  programs,  absolute time: 1:27:49.447647,  relative time: 0:00:05.634541
+Generated 8620000  programs,  absolute time: 1:27:55.070388,  relative time: 0:00:05.622741
+Generated 8630000  programs,  absolute time: 1:28:00.702749,  relative time: 0:00:05.632361
+Generated 8640000  programs,  absolute time: 1:28:06.287604,  relative time: 0:00:05.584855
+Generated 8650000  programs,  absolute time: 1:28:11.926718,  relative time: 0:00:05.639114
+Generated 8660000  programs,  absolute time: 1:28:17.755778,  relative time: 0:00:05.829060
+Generated 8660000  programs,  absolute time: 1:28:17.756546,  relative time: 0:00:00.000768
+Generated 8670000  programs,  absolute time: 1:28:23.444010,  relative time: 0:00:05.687464
+Generated 8680000  programs,  absolute time: 1:28:29.121068,  relative time: 0:00:05.677058
+Generated 8690000  programs,  absolute time: 1:28:34.698372,  relative time: 0:00:05.577304
+Generated 8700000  programs,  absolute time: 1:28:41.513232,  relative time: 0:00:06.814860
+Generated 8710000  programs,  absolute time: 1:28:47.136555,  relative time: 0:00:05.623323
+Generated 8720000  programs,  absolute time: 1:28:52.745936,  relative time: 0:00:05.609381
+Generated 8720000  programs,  absolute time: 1:28:52.746949,  relative time: 0:00:00.001013
+Generated 8730000  programs,  absolute time: 1:28:58.381268,  relative time: 0:00:05.634319
+Generated 8740000  programs,  absolute time: 1:29:04.025497,  relative time: 0:00:05.644229
+Generated 8750000  programs,  absolute time: 1:29:09.580872,  relative time: 0:00:05.555375
+Generated 8760000  programs,  absolute time: 1:29:17.234246,  relative time: 0:00:07.653374
+Generated 8770000  programs,  absolute time: 1:29:22.913093,  relative time: 0:00:05.678847
+Generated 8780000  programs,  absolute time: 1:29:28.577841,  relative time: 0:00:05.664748
+Generated 8790000  programs,  absolute time: 1:29:34.190124,  relative time: 0:00:05.612283
+Generated 8800000  programs,  absolute time: 1:29:39.855186,  relative time: 0:00:05.665062
+Generated 8810000  programs,  absolute time: 1:29:45.462739,  relative time: 0:00:05.607553
+Generated 8820000  programs,  absolute time: 1:29:51.098612,  relative time: 0:00:05.635873
+Generated 8830000  programs,  absolute time: 1:29:56.773442,  relative time: 0:00:05.674830
+Generated 8840000  programs,  absolute time: 1:30:02.476259,  relative time: 0:00:05.702817
+Generated 8850000  programs,  absolute time: 1:30:08.106816,  relative time: 0:00:05.630557
+Generated 8860000  programs,  absolute time: 1:30:13.730168,  relative time: 0:00:05.623352
+Generated 8870000  programs,  absolute time: 1:30:19.466366,  relative time: 0:00:05.736198
+Generated 8880000  programs,  absolute time: 1:30:25.052556,  relative time: 0:00:05.586190
+Generated 8890000  programs,  absolute time: 1:30:30.778737,  relative time: 0:00:05.726181
+Generated 8900000  programs,  absolute time: 1:30:36.403910,  relative time: 0:00:05.625173
+Generated 8910000  programs,  absolute time: 1:30:42.061156,  relative time: 0:00:05.657246
+Generated 8910000  programs,  absolute time: 1:30:42.061275,  relative time: 0:00:00.000119
+Generated 8920000  programs,  absolute time: 1:30:47.670160,  relative time: 0:00:05.608885
+Generated 8930000  programs,  absolute time: 1:30:53.266743,  relative time: 0:00:05.596583
+Generated 8940000  programs,  absolute time: 1:30:58.942288,  relative time: 0:00:05.675545
+Generated 8950000  programs,  absolute time: 1:31:04.622197,  relative time: 0:00:05.679909
+Generated 8960000  programs,  absolute time: 1:31:10.315827,  relative time: 0:00:05.693630
+Generated 8970000  programs,  absolute time: 1:31:15.938089,  relative time: 0:00:05.622262
+Generated 8980000  programs,  absolute time: 1:31:23.582996,  relative time: 0:00:07.644907
+Generated 8990000  programs,  absolute time: 1:31:29.236041,  relative time: 0:00:05.653045
+Generated 9000000  programs,  absolute time: 1:31:34.860384,  relative time: 0:00:05.624343
+Generated 9010000  programs,  absolute time: 1:31:40.452073,  relative time: 0:00:05.591689
+Generated 9020000  programs,  absolute time: 1:31:46.088232,  relative time: 0:00:05.636159
+Generated 9030000  programs,  absolute time: 1:31:51.696305,  relative time: 0:00:05.608073
+Generated 9040000  programs,  absolute time: 1:31:59.314380,  relative time: 0:00:07.618075
+Generated 9050000  programs,  absolute time: 1:32:04.934680,  relative time: 0:00:05.620300
+Generated 9060000  programs,  absolute time: 1:32:10.501490,  relative time: 0:00:05.566810
+Generated 9070000  programs,  absolute time: 1:32:18.137585,  relative time: 0:00:07.636095
+Generated 9080000  programs,  absolute time: 1:32:23.789594,  relative time: 0:00:05.652009
+Generated 9090000  programs,  absolute time: 1:32:29.412412,  relative time: 0:00:05.622818
+Generated 9100000  programs,  absolute time: 1:32:35.019620,  relative time: 0:00:05.607208
+Generated 9110000  programs,  absolute time: 1:32:40.579274,  relative time: 0:00:05.559654
+Generated 9120000  programs,  absolute time: 1:32:46.186426,  relative time: 0:00:05.607152
+Generated 9130000  programs,  absolute time: 1:32:51.823126,  relative time: 0:00:05.636700
+Generated 9130000  programs,  absolute time: 1:32:51.823787,  relative time: 0:00:00.000661
+Generated 9140000  programs,  absolute time: 1:32:57.453059,  relative time: 0:00:05.629272
+Generated 9150000  programs,  absolute time: 1:33:03.054555,  relative time: 0:00:05.601496
+Generated 9160000  programs,  absolute time: 1:33:08.707621,  relative time: 0:00:05.653066
+Generated 9170000  programs,  absolute time: 1:33:14.297321,  relative time: 0:00:05.589700
+Generated 9180000  programs,  absolute time: 1:33:21.928357,  relative time: 0:00:07.631036
+Generated 9190000  programs,  absolute time: 1:33:27.504948,  relative time: 0:00:05.576591
+Generated 9200000  programs,  absolute time: 1:33:33.166852,  relative time: 0:00:05.661904
+Generated 9210000  programs,  absolute time: 1:33:38.776541,  relative time: 0:00:05.609689
+Generated 9220000  programs,  absolute time: 1:33:44.393368,  relative time: 0:00:05.616827
+Generated 9230000  programs,  absolute time: 1:33:50.048267,  relative time: 0:00:05.654899
+Generated 9240000  programs,  absolute time: 1:33:55.685344,  relative time: 0:00:05.637077
+Generated 9250000  programs,  absolute time: 1:34:01.317243,  relative time: 0:00:05.631899
+Generated 9250000  programs,  absolute time: 1:34:01.317777,  relative time: 0:00:00.000534
+Generated 9260000  programs,  absolute time: 1:34:06.952485,  relative time: 0:00:05.634708
+Generated 9270000  programs,  absolute time: 1:34:12.549466,  relative time: 0:00:05.596981
+Generated 9280000  programs,  absolute time: 1:34:18.160112,  relative time: 0:00:05.610646
+Generated 9280000  programs,  absolute time: 1:34:18.160717,  relative time: 0:00:00.000605
+Generated 9290000  programs,  absolute time: 1:34:25.793717,  relative time: 0:00:07.633000
+Generated 9300000  programs,  absolute time: 1:34:31.456559,  relative time: 0:00:05.662842
+Generated 9310000  programs,  absolute time: 1:34:37.070383,  relative time: 0:00:05.613824
+Generated 9320000  programs,  absolute time: 1:34:42.732411,  relative time: 0:00:05.662028
+Generated 9330000  programs,  absolute time: 1:34:48.331826,  relative time: 0:00:05.599415
+Generated 9340000  programs,  absolute time: 1:34:53.997655,  relative time: 0:00:05.665829
+Generated 9350000  programs,  absolute time: 1:34:59.612934,  relative time: 0:00:05.615279
+Generated 9360000  programs,  absolute time: 1:35:05.225489,  relative time: 0:00:05.612555
+Generated 9360000  programs,  absolute time: 1:35:05.226178,  relative time: 0:00:00.000689
+Generated 9370000  programs,  absolute time: 1:35:10.869177,  relative time: 0:00:05.642999
+Generated 9380000  programs,  absolute time: 1:35:16.495578,  relative time: 0:00:05.626401
+Generated 9390000  programs,  absolute time: 1:35:22.072480,  relative time: 0:00:05.576902
+Generated 9400000  programs,  absolute time: 1:35:27.713390,  relative time: 0:00:05.640910
+Generated 9410000  programs,  absolute time: 1:35:35.359974,  relative time: 0:00:07.646584
+Generated 9420000  programs,  absolute time: 1:35:40.948055,  relative time: 0:00:05.588081
+Generated 9430000  programs,  absolute time: 1:35:46.545996,  relative time: 0:00:05.597941
+Generated 9440000  programs,  absolute time: 1:35:54.201235,  relative time: 0:00:07.655239
+Generated 9450000  programs,  absolute time: 1:36:01.862508,  relative time: 0:00:07.661273
+Generated 9460000  programs,  absolute time: 1:36:07.495784,  relative time: 0:00:05.633276
+Generated 9460000  programs,  absolute time: 1:36:07.496511,  relative time: 0:00:00.000727
+Generated 9460000  programs,  absolute time: 1:36:07.496609,  relative time: 0:00:00.000098
+Generated 9470000  programs,  absolute time: 1:36:13.147070,  relative time: 0:00:05.650461
+Generated 9480000  programs,  absolute time: 1:36:18.755816,  relative time: 0:00:05.608746
+Generated 9490000  programs,  absolute time: 1:36:24.365752,  relative time: 0:00:05.609936
+Generated 9500000  programs,  absolute time: 1:36:30.028585,  relative time: 0:00:05.662833
+Generated 9510000  programs,  absolute time: 1:36:35.647194,  relative time: 0:00:05.618609
+Generated 9510000  programs,  absolute time: 1:36:35.647842,  relative time: 0:00:00.000648
+Generated 9520000  programs,  absolute time: 1:36:41.323419,  relative time: 0:00:05.675577
+Generated 9530000  programs,  absolute time: 1:36:47.289561,  relative time: 0:00:05.966142
+Generated 9540000  programs,  absolute time: 1:36:52.922999,  relative time: 0:00:05.633438
+Generated 9550000  programs,  absolute time: 1:36:58.509953,  relative time: 0:00:05.586954
+Generated 9560000  programs,  absolute time: 1:37:04.180315,  relative time: 0:00:05.670362
+Generated 9570000  programs,  absolute time: 1:37:09.799964,  relative time: 0:00:05.619649
+Generated 9580000  programs,  absolute time: 1:37:15.432629,  relative time: 0:00:05.632665
+Generated 9580000  programs,  absolute time: 1:37:15.433378,  relative time: 0:00:00.000749
+Generated 9590000  programs,  absolute time: 1:37:21.056917,  relative time: 0:00:05.623539
+Generated 9600000  programs,  absolute time: 1:37:26.709450,  relative time: 0:00:05.652533
+Generated 9610000  programs,  absolute time: 1:37:32.342525,  relative time: 0:00:05.633075
+Generated 9620000  programs,  absolute time: 1:37:37.939057,  relative time: 0:00:05.596532
+Generated 9630000  programs,  absolute time: 1:37:43.602196,  relative time: 0:00:05.663139
+Generated 9630000  programs,  absolute time: 1:37:43.602945,  relative time: 0:00:00.000749
+Generated 9640000  programs,  absolute time: 1:37:49.257285,  relative time: 0:00:05.654340
+Generated 9650000  programs,  absolute time: 1:37:54.877910,  relative time: 0:00:05.620625
+Generated 9650000  programs,  absolute time: 1:37:54.878662,  relative time: 0:00:00.000752
+Generated 9660000  programs,  absolute time: 1:38:00.494484,  relative time: 0:00:05.615822
+Generated 9670000  programs,  absolute time: 1:38:06.139435,  relative time: 0:00:05.644951
+Generated 9680000  programs,  absolute time: 1:38:11.781578,  relative time: 0:00:05.642143
+Generated 9690000  programs,  absolute time: 1:38:17.413329,  relative time: 0:00:05.631751
+Generated 9700000  programs,  absolute time: 1:38:23.056883,  relative time: 0:00:05.643554
+Generated 9710000  programs,  absolute time: 1:38:28.690919,  relative time: 0:00:05.634036
+Generated 9720000  programs,  absolute time: 1:38:34.280301,  relative time: 0:00:05.589382
+Generated 9730000  programs,  absolute time: 1:38:39.871392,  relative time: 0:00:05.591091
+Generated 9740000  programs,  absolute time: 1:38:47.495240,  relative time: 0:00:07.623848
+Generated 9750000  programs,  absolute time: 1:38:53.133586,  relative time: 0:00:05.638346
+Generated 9760000  programs,  absolute time: 1:39:00.730960,  relative time: 0:00:07.597374
+Generated 9770000  programs,  absolute time: 1:39:06.766724,  relative time: 0:00:06.035764
+Generated 9780000  programs,  absolute time: 1:39:12.396346,  relative time: 0:00:05.629622
+Generated 9790000  programs,  absolute time: 1:39:18.027233,  relative time: 0:00:05.630887
+Generated 9800000  programs,  absolute time: 1:39:23.630394,  relative time: 0:00:05.603161
+Generated 9810000  programs,  absolute time: 1:39:29.203003,  relative time: 0:00:05.572609
+Generated 9820000  programs,  absolute time: 1:39:34.841875,  relative time: 0:00:05.638872
+Generated 9830000  programs,  absolute time: 1:39:40.443391,  relative time: 0:00:05.601516
+Generated 9840000  programs,  absolute time: 1:39:46.048566,  relative time: 0:00:05.605175
+Generated 9850000  programs,  absolute time: 1:39:51.641183,  relative time: 0:00:05.592617
+Generated 9860000  programs,  absolute time: 1:39:57.254651,  relative time: 0:00:05.613468
+Generated 9870000  programs,  absolute time: 1:40:02.818756,  relative time: 0:00:05.564105
+Generated 9880000  programs,  absolute time: 1:40:08.390581,  relative time: 0:00:05.571825
+Generated 9890000  programs,  absolute time: 1:40:14.023973,  relative time: 0:00:05.633392
+Generated 9900000  programs,  absolute time: 1:40:19.647559,  relative time: 0:00:05.623586
+Generated 9910000  programs,  absolute time: 1:40:25.264903,  relative time: 0:00:05.617344
+Generated 9920000  programs,  absolute time: 1:40:31.019230,  relative time: 0:00:05.754327
+Generated 9930000  programs,  absolute time: 1:40:36.632744,  relative time: 0:00:05.613514
+Generated 9940000  programs,  absolute time: 1:40:42.259442,  relative time: 0:00:05.626698
+Generated 9950000  programs,  absolute time: 1:40:47.881014,  relative time: 0:00:05.621572
+Generated 9960000  programs,  absolute time: 1:40:53.457527,  relative time: 0:00:05.576513
+Generated 9970000  programs,  absolute time: 1:41:01.075274,  relative time: 0:00:07.617747
+Generated 9980000  programs,  absolute time: 1:41:06.752725,  relative time: 0:00:05.677451
+Generated 9990000  programs,  absolute time: 1:41:12.406999,  relative time: 0:00:05.654274
+Generated 10000000 programs,  absolute time: 1:41:18.087098,  relative time: 0:00:05.680099
+Generated 10000000 programs,  absolute time: 1:41:18.088200,  relative time: 0:00:00.001102
+Generated 10010000 programs,  absolute time: 1:41:25.761835,  relative time: 0:00:07.673635
+Generated 10020000 programs,  absolute time: 1:41:31.421229,  relative time: 0:00:05.659394
+Generated 10020000 programs,  absolute time: 1:41:31.421865,  relative time: 0:00:00.000636
+Generated 10030000 programs,  absolute time: 1:41:37.006329,  relative time: 0:00:05.584464
+Generated 10040000 programs,  absolute time: 1:41:42.640260,  relative time: 0:00:05.633931
+Generated 10050000 programs,  absolute time: 1:41:48.220154,  relative time: 0:00:05.579894
+Generated 10060000 programs,  absolute time: 1:41:53.823242,  relative time: 0:00:05.603088
+Generated 10070000 programs,  absolute time: 1:41:59.423468,  relative time: 0:00:05.600226
+Generated 10080000 programs,  absolute time: 1:42:05.018730,  relative time: 0:00:05.595262
+Generated 10090000 programs,  absolute time: 1:42:12.666398,  relative time: 0:00:07.647668
+Generated 10100000 programs,  absolute time: 1:42:18.265738,  relative time: 0:00:05.599340
+Generated 10110000 programs,  absolute time: 1:42:23.876981,  relative time: 0:00:05.611243
+Generated 10120000 programs,  absolute time: 1:42:29.506537,  relative time: 0:00:05.629556
+Generated 10130000 programs,  absolute time: 1:42:35.170957,  relative time: 0:00:05.664420
+Generated 10140000 programs,  absolute time: 1:42:40.780033,  relative time: 0:00:05.609076
+Generated 10150000 programs,  absolute time: 1:42:46.440425,  relative time: 0:00:05.660392
+Generated 10160000 programs,  absolute time: 1:42:52.076484,  relative time: 0:00:05.636059
+Generated 10170000 programs,  absolute time: 1:42:57.722275,  relative time: 0:00:05.645791
+Generated 10180000 programs,  absolute time: 1:43:03.424219,  relative time: 0:00:05.701944
+Generated 10190000 programs,  absolute time: 1:43:09.014821,  relative time: 0:00:05.590602
+Generated 10200000 programs,  absolute time: 1:43:14.634637,  relative time: 0:00:05.619816
+Generated 10210000 programs,  absolute time: 1:43:20.268060,  relative time: 0:00:05.633423
+Generated 10220000 programs,  absolute time: 1:43:25.939573,  relative time: 0:00:05.671513
+Generated 10230000 programs,  absolute time: 1:43:31.527044,  relative time: 0:00:05.587471
+Generated 10240000 programs,  absolute time: 1:43:37.204307,  relative time: 0:00:05.677263
+Generated 10240000 programs,  absolute time: 1:43:37.204435,  relative time: 0:00:00.000128
+Generated 10250000 programs,  absolute time: 1:43:42.811213,  relative time: 0:00:05.606778
+Generated 10260000 programs,  absolute time: 1:43:48.425304,  relative time: 0:00:05.614091
+Generated 10270000 programs,  absolute time: 1:43:54.044776,  relative time: 0:00:05.619472
+Generated 10280000 programs,  absolute time: 1:43:59.716926,  relative time: 0:00:05.672150
+Generated 10290000 programs,  absolute time: 1:44:05.325602,  relative time: 0:00:05.608676
+Generated 10300000 programs,  absolute time: 1:44:10.971726,  relative time: 0:00:05.646124
+Generated 10310000 programs,  absolute time: 1:44:16.593143,  relative time: 0:00:05.621417
+Generated 10320000 programs,  absolute time: 1:44:22.196296,  relative time: 0:00:05.603153
+Generated 10330000 programs,  absolute time: 1:44:27.795164,  relative time: 0:00:05.598868
+Generated 10340000 programs,  absolute time: 1:44:33.352030,  relative time: 0:00:05.556866
+Generated 10340000 programs,  absolute time: 1:44:33.352492,  relative time: 0:00:00.000462
+Generated 10350000 programs,  absolute time: 1:44:38.991467,  relative time: 0:00:05.638975
+Generated 10350000 programs,  absolute time: 1:44:38.992353,  relative time: 0:00:00.000886
+Generated 10360000 programs,  absolute time: 1:44:44.580951,  relative time: 0:00:05.588598
+Generated 10370000 programs,  absolute time: 1:44:50.174244,  relative time: 0:00:05.593293
+Generated 10380000 programs,  absolute time: 1:44:55.787904,  relative time: 0:00:05.613660
+Generated 10390000 programs,  absolute time: 1:45:03.436540,  relative time: 0:00:07.648636
+Generated 10400000 programs,  absolute time: 1:45:09.066282,  relative time: 0:00:05.629742
+Generated 10410000 programs,  absolute time: 1:45:14.739571,  relative time: 0:00:05.673289
+Generated 10420000 programs,  absolute time: 1:45:20.371857,  relative time: 0:00:05.632286
+Generated 10430000 programs,  absolute time: 1:45:26.012224,  relative time: 0:00:05.640367
+Generated 10440000 programs,  absolute time: 1:45:33.674822,  relative time: 0:00:07.662598
+Generated 10450000 programs,  absolute time: 1:45:39.377972,  relative time: 0:00:05.703150
+Generated 10460000 programs,  absolute time: 1:45:44.974284,  relative time: 0:00:05.596312
+Generated 10470000 programs,  absolute time: 1:45:50.682386,  relative time: 0:00:05.708102
+Generated 10480000 programs,  absolute time: 1:45:56.474267,  relative time: 0:00:05.791881
+Generated 10490000 programs,  absolute time: 1:46:02.625418,  relative time: 0:00:06.151151
+Generated 10500000 programs,  absolute time: 1:46:08.272263,  relative time: 0:00:05.646845
+Generated 10500000 programs,  absolute time: 1:46:08.272952,  relative time: 0:00:00.000689
+Generated 10510000 programs,  absolute time: 1:46:13.872740,  relative time: 0:00:05.599788
+Generated 10520000 programs,  absolute time: 1:46:19.486185,  relative time: 0:00:05.613445
+Generated 10530000 programs,  absolute time: 1:46:25.088985,  relative time: 0:00:05.602800
+Generated 10540000 programs,  absolute time: 1:46:30.764442,  relative time: 0:00:05.675457
+Generated 10550000 programs,  absolute time: 1:46:36.397158,  relative time: 0:00:05.632716
+Generated 10560000 programs,  absolute time: 1:46:42.028487,  relative time: 0:00:05.631329
+Generated 10570000 programs,  absolute time: 1:46:47.624571,  relative time: 0:00:05.596084
+Generated 10580000 programs,  absolute time: 1:46:53.252073,  relative time: 0:00:05.627502
+Generated 10580000 programs,  absolute time: 1:46:53.252733,  relative time: 0:00:00.000660
+Generated 10590000 programs,  absolute time: 1:46:58.842434,  relative time: 0:00:05.589701
+Generated 10600000 programs,  absolute time: 1:47:04.464098,  relative time: 0:00:05.621664
+Generated 10610000 programs,  absolute time: 1:47:10.117681,  relative time: 0:00:05.653583
+Generated 10620000 programs,  absolute time: 1:47:15.776881,  relative time: 0:00:05.659200
+Generated 10630000 programs,  absolute time: 1:47:21.413397,  relative time: 0:00:05.636516
+Generated 10640000 programs,  absolute time: 1:47:27.011990,  relative time: 0:00:05.598593
+Generated 10650000 programs,  absolute time: 1:47:32.600049,  relative time: 0:00:05.588059
+Generated 10660000 programs,  absolute time: 1:47:38.280564,  relative time: 0:00:05.680515
+Generated 10670000 programs,  absolute time: 1:47:43.892354,  relative time: 0:00:05.611790
+Generated 10680000 programs,  absolute time: 1:47:49.555263,  relative time: 0:00:05.662909
+Generated 10690000 programs,  absolute time: 1:47:57.172657,  relative time: 0:00:07.617394
+Generated 10700000 programs,  absolute time: 1:48:02.787013,  relative time: 0:00:05.614356
+Generated 10710000 programs,  absolute time: 1:48:08.415076,  relative time: 0:00:05.628063
+Generated 10720000 programs,  absolute time: 1:48:14.021216,  relative time: 0:00:05.606140
+Generated 10730000 programs,  absolute time: 1:48:21.661442,  relative time: 0:00:07.640226
+Generated 10740000 programs,  absolute time: 1:48:27.291983,  relative time: 0:00:05.630541
+Generated 10750000 programs,  absolute time: 1:48:32.949458,  relative time: 0:00:05.657475
+Generated 10760000 programs,  absolute time: 1:48:38.571925,  relative time: 0:00:05.622467
+Generated 10770000 programs,  absolute time: 1:48:44.197681,  relative time: 0:00:05.625756
+Generated 10780000 programs,  absolute time: 1:48:49.852490,  relative time: 0:00:05.654809
+Generated 10790000 programs,  absolute time: 1:48:55.435853,  relative time: 0:00:05.583363
+Generated 10800000 programs,  absolute time: 1:49:03.081688,  relative time: 0:00:07.645835
+Generated 10810000 programs,  absolute time: 1:49:08.687336,  relative time: 0:00:05.605648
+Generated 10820000 programs,  absolute time: 1:49:14.313027,  relative time: 0:00:05.625691
+Generated 10830000 programs,  absolute time: 1:49:21.953878,  relative time: 0:00:07.640851
+Generated 10840000 programs,  absolute time: 1:49:27.600549,  relative time: 0:00:05.646671
+Generated 10850000 programs,  absolute time: 1:49:33.215639,  relative time: 0:00:05.615090
+Generated 10860000 programs,  absolute time: 1:49:38.842967,  relative time: 0:00:05.627328
+Generated 10870000 programs,  absolute time: 1:49:44.436470,  relative time: 0:00:05.593503
+Generated 10870000 programs,  absolute time: 1:49:44.436587,  relative time: 0:00:00.000117
+Generated 10880000 programs,  absolute time: 1:49:50.096300,  relative time: 0:00:05.659713
+Generated 10890000 programs,  absolute time: 1:49:55.812357,  relative time: 0:00:05.716057
+Generated 10900000 programs,  absolute time: 1:50:02.259554,  relative time: 0:00:06.447197
+Generated 10910000 programs,  absolute time: 1:50:07.875289,  relative time: 0:00:05.615735
+Generated 10920000 programs,  absolute time: 1:50:13.516172,  relative time: 0:00:05.640883
+Generated 10930000 programs,  absolute time: 1:50:19.097118,  relative time: 0:00:05.580946
+Generated 10940000 programs,  absolute time: 1:50:24.714759,  relative time: 0:00:05.617641
+Generated 10940000 programs,  absolute time: 1:50:24.714870,  relative time: 0:00:00.000111
+Generated 10950000 programs,  absolute time: 1:50:30.320206,  relative time: 0:00:05.605336
+Generated 10960000 programs,  absolute time: 1:50:35.935821,  relative time: 0:00:05.615615
+Generated 10970000 programs,  absolute time: 1:50:41.580977,  relative time: 0:00:05.645156
+Generated 10980000 programs,  absolute time: 1:50:47.171063,  relative time: 0:00:05.590086
+Generated 10990000 programs,  absolute time: 1:50:52.782774,  relative time: 0:00:05.611711
+Generated 11000000 programs,  absolute time: 1:50:58.421566,  relative time: 0:00:05.638792
+Generated 11010000 programs,  absolute time: 1:51:04.067302,  relative time: 0:00:05.645736
+Generated 11020000 programs,  absolute time: 1:51:09.665952,  relative time: 0:00:05.598650
+Generated 11030000 programs,  absolute time: 1:51:15.302337,  relative time: 0:00:05.636385
+Generated 11040000 programs,  absolute time: 1:51:22.878059,  relative time: 0:00:07.575722
+Generated 11050000 programs,  absolute time: 1:51:28.499618,  relative time: 0:00:05.621559
+Generated 11060000 programs,  absolute time: 1:51:36.088513,  relative time: 0:00:07.588895
+Generated 11070000 programs,  absolute time: 1:51:41.687895,  relative time: 0:00:05.599382
+Generated 11080000 programs,  absolute time: 1:51:47.319263,  relative time: 0:00:05.631368
+Generated 11090000 programs,  absolute time: 1:51:52.939946,  relative time: 0:00:05.620683
+Generated 11100000 programs,  absolute time: 1:51:58.543031,  relative time: 0:00:05.603085
+Generated 11110000 programs,  absolute time: 1:52:04.198392,  relative time: 0:00:05.655361
+Generated 11120000 programs,  absolute time: 1:52:09.772238,  relative time: 0:00:05.573846
+Generated 11130000 programs,  absolute time: 1:52:15.321727,  relative time: 0:00:05.549489
+Generated 11140000 programs,  absolute time: 1:52:20.950216,  relative time: 0:00:05.628489
+Generated 11150000 programs,  absolute time: 1:52:26.569343,  relative time: 0:00:05.619127
+Generated 11160000 programs,  absolute time: 1:52:32.154330,  relative time: 0:00:05.584987
+Generated 11160000 programs,  absolute time: 1:52:32.155074,  relative time: 0:00:00.000744
+Generated 11160000 programs,  absolute time: 1:52:32.155175,  relative time: 0:00:00.000101
+Generated 11170000 programs,  absolute time: 1:52:37.782870,  relative time: 0:00:05.627695
+Generated 11180000 programs,  absolute time: 1:52:43.407067,  relative time: 0:00:05.624197
+Generated 11190000 programs,  absolute time: 1:52:49.025139,  relative time: 0:00:05.618072
+Generated 11200000 programs,  absolute time: 1:52:54.650521,  relative time: 0:00:05.625382
+Generated 11210000 programs,  absolute time: 1:53:00.241470,  relative time: 0:00:05.590949
+Generated 11220000 programs,  absolute time: 1:53:05.886948,  relative time: 0:00:05.645478
+Generated 11230000 programs,  absolute time: 1:53:11.470149,  relative time: 0:00:05.583201
+Generated 11240000 programs,  absolute time: 1:53:17.076906,  relative time: 0:00:05.606757
+Generated 11250000 programs,  absolute time: 1:53:22.715200,  relative time: 0:00:05.638294
+Generated 11260000 programs,  absolute time: 1:53:28.335200,  relative time: 0:00:05.620000
+Generated 11270000 programs,  absolute time: 1:53:33.952178,  relative time: 0:00:05.616978
+Generated 11280000 programs,  absolute time: 1:53:39.601119,  relative time: 0:00:05.648941
+Generated 11290000 programs,  absolute time: 1:53:45.208099,  relative time: 0:00:05.606980
+Generated 11300000 programs,  absolute time: 1:53:50.781219,  relative time: 0:00:05.573120
+Generated 11310000 programs,  absolute time: 1:53:56.384946,  relative time: 0:00:05.603727
+Generated 11320000 programs,  absolute time: 1:54:02.028679,  relative time: 0:00:05.643733
+Generated 11330000 programs,  absolute time: 1:54:07.680343,  relative time: 0:00:05.651664
+Generated 11340000 programs,  absolute time: 1:54:13.284884,  relative time: 0:00:05.604541
+Generated 11350000 programs,  absolute time: 1:54:18.841885,  relative time: 0:00:05.557001
+Generated 11360000 programs,  absolute time: 1:54:24.493118,  relative time: 0:00:05.651233
+Generated 11370000 programs,  absolute time: 1:54:30.116088,  relative time: 0:00:05.622970
+Generated 11380000 programs,  absolute time: 1:54:35.750186,  relative time: 0:00:05.634098
+Generated 11390000 programs,  absolute time: 1:54:41.361813,  relative time: 0:00:05.611627
+Generated 11400000 programs,  absolute time: 1:54:47.002267,  relative time: 0:00:05.640454
+Generated 11410000 programs,  absolute time: 1:54:52.622024,  relative time: 0:00:05.619757
+Generated 11410000 programs,  absolute time: 1:54:52.622156,  relative time: 0:00:00.000132
+Generated 11420000 programs,  absolute time: 1:54:58.245000,  relative time: 0:00:05.622844
+Generated 11430000 programs,  absolute time: 1:55:03.832202,  relative time: 0:00:05.587202
+Generated 11440000 programs,  absolute time: 1:55:09.453820,  relative time: 0:00:05.621618
+Generated 11450000 programs,  absolute time: 1:55:15.118935,  relative time: 0:00:05.665115
+Generated 11460000 programs,  absolute time: 1:55:20.708533,  relative time: 0:00:05.589598
+Generated 11470000 programs,  absolute time: 1:55:26.380160,  relative time: 0:00:05.671627
+Generated 11480000 programs,  absolute time: 1:55:32.031153,  relative time: 0:00:05.650993
+Generated 11490000 programs,  absolute time: 1:55:37.635926,  relative time: 0:00:05.604773
+Generated 11500000 programs,  absolute time: 1:55:43.303730,  relative time: 0:00:05.667804
+Generated 11500000 programs,  absolute time: 1:55:43.303854,  relative time: 0:00:00.000124
+Generated 11500000 programs,  absolute time: 1:55:43.304657,  relative time: 0:00:00.000803
+Generated 11510000 programs,  absolute time: 1:55:48.915105,  relative time: 0:00:05.610448
+Generated 11520000 programs,  absolute time: 1:55:54.587312,  relative time: 0:00:05.672207
+Generated 11530000 programs,  absolute time: 1:56:00.276414,  relative time: 0:00:05.689102
+Generated 11540000 programs,  absolute time: 1:56:05.905182,  relative time: 0:00:05.628768
+Generated 11550000 programs,  absolute time: 1:56:11.511094,  relative time: 0:00:05.605912
+Generated 11560000 programs,  absolute time: 1:56:17.210076,  relative time: 0:00:05.698982
+Generated 11570000 programs,  absolute time: 1:56:22.815224,  relative time: 0:00:05.605148
+Generated 11580000 programs,  absolute time: 1:56:28.447492,  relative time: 0:00:05.632268
+Generated 11590000 programs,  absolute time: 1:56:34.097963,  relative time: 0:00:05.650471
+Generated 11600000 programs,  absolute time: 1:56:41.749862,  relative time: 0:00:07.651899
+Generated 11610000 programs,  absolute time: 1:56:49.409425,  relative time: 0:00:07.659563
+Generated 11620000 programs,  absolute time: 1:56:55.051517,  relative time: 0:00:05.642092
+Generated 11630000 programs,  absolute time: 1:57:00.651847,  relative time: 0:00:05.600330
+Generated 11640000 programs,  absolute time: 1:57:06.308002,  relative time: 0:00:05.656155
+Generated 11640000 programs,  absolute time: 1:57:06.308170,  relative time: 0:00:00.000168
+Generated 11650000 programs,  absolute time: 1:57:11.956803,  relative time: 0:00:05.648633
+Generated 11660000 programs,  absolute time: 1:57:17.604363,  relative time: 0:00:05.647560
+Generated 11660000 programs,  absolute time: 1:57:17.604510,  relative time: 0:00:00.000147
+Generated 11670000 programs,  absolute time: 1:57:23.314080,  relative time: 0:00:05.709570
+Generated 11670000 programs,  absolute time: 1:57:23.314445,  relative time: 0:00:00.000365
+Generated 11680000 programs,  absolute time: 1:57:28.974287,  relative time: 0:00:05.659842
+Generated 11690000 programs,  absolute time: 1:57:34.597850,  relative time: 0:00:05.623563
+Generated 11700000 programs,  absolute time: 1:57:40.228988,  relative time: 0:00:05.631138
+Generated 11710000 programs,  absolute time: 1:57:45.806379,  relative time: 0:00:05.577391
+Generated 11720000 programs,  absolute time: 1:57:51.440476,  relative time: 0:00:05.634097
+Generated 11730000 programs,  absolute time: 1:57:57.112280,  relative time: 0:00:05.671804
+Generated 11740000 programs,  absolute time: 1:58:02.739156,  relative time: 0:00:05.626876
+Generated 11750000 programs,  absolute time: 1:58:08.345343,  relative time: 0:00:05.606187
+Generated 11760000 programs,  absolute time: 1:58:13.963274,  relative time: 0:00:05.617931
+Generated 11770000 programs,  absolute time: 1:58:19.600015,  relative time: 0:00:05.636741
+Generated 11780000 programs,  absolute time: 1:58:25.270232,  relative time: 0:00:05.670217
+Generated 11780000 programs,  absolute time: 1:58:25.270367,  relative time: 0:00:00.000135
+Generated 11790000 programs,  absolute time: 1:58:30.930467,  relative time: 0:00:05.660100
+Generated 11800000 programs,  absolute time: 1:58:36.574009,  relative time: 0:00:05.643542
+Generated 11810000 programs,  absolute time: 1:58:42.204412,  relative time: 0:00:05.630403
+Generated 11820000 programs,  absolute time: 1:58:47.812723,  relative time: 0:00:05.608311
+Generated 11830000 programs,  absolute time: 1:58:53.527600,  relative time: 0:00:05.714877
+Generated 11830000 programs,  absolute time: 1:58:53.528248,  relative time: 0:00:00.000648
+Generated 11840000 programs,  absolute time: 1:58:59.201377,  relative time: 0:00:05.673129
+Generated 11850000 programs,  absolute time: 1:59:04.784736,  relative time: 0:00:05.583359
+Generated 11860000 programs,  absolute time: 1:59:10.397218,  relative time: 0:00:05.612482
+Generated 11870000 programs,  absolute time: 1:59:16.084432,  relative time: 0:00:05.687214
+Generated 11880000 programs,  absolute time: 1:59:21.685514,  relative time: 0:00:05.601082
+Generated 11890000 programs,  absolute time: 1:59:27.299511,  relative time: 0:00:05.613997
+Generated 11900000 programs,  absolute time: 1:59:33.024161,  relative time: 0:00:05.724650
+Generated 11900000 programs,  absolute time: 1:59:33.025011,  relative time: 0:00:00.000850
+Generated 11910000 programs,  absolute time: 1:59:38.621137,  relative time: 0:00:05.596126
+Generated 11920000 programs,  absolute time: 1:59:44.193894,  relative time: 0:00:05.572757
+Generated 11930000 programs,  absolute time: 1:59:49.845274,  relative time: 0:00:05.651380
+Generated 11940000 programs,  absolute time: 1:59:55.429227,  relative time: 0:00:05.583953
+Generated 11950000 programs,  absolute time: 2:00:01.016491,  relative time: 0:00:05.587264
+Generated 11960000 programs,  absolute time: 2:00:06.746771,  relative time: 0:00:05.730280
+Generated 11970000 programs,  absolute time: 2:00:12.419207,  relative time: 0:00:05.672436
+Generated 11980000 programs,  absolute time: 2:00:18.049777,  relative time: 0:00:05.630570
+Generated 11990000 programs,  absolute time: 2:00:23.658255,  relative time: 0:00:05.608478
diff --git a/datasets/dataset-2/slurm_51080801.out b/datasets/dataset-2/slurm_51080801.out
new file mode 100644
index 0000000..5670cbe
--- /dev/null
+++ b/datasets/dataset-2/slurm_51080801.out
@@ -0,0 +1,2 @@
+percentage of timeouts: 0.00%
+percentage of zero divisions: 5.66%
diff --git a/datasets/dataset-3/.readme.md b/datasets/dataset-3/.readme.md
new file mode 100644
index 0000000..e60880f
--- /dev/null
+++ b/datasets/dataset-3/.readme.md
@@ -0,0 +1,8 @@
+# DATA DESCRIPTION:
+
+# DATA OBTENTION:
+
+
+# META-DATA:
+
+# DATA LOCATION:
diff --git a/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-1.log b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-1.log
new file mode 100644
index 0000000..2bc3c01
--- /dev/null
+++ b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-1.log
@@ -0,0 +1,46 @@
+                                                                                                                                                                                                        
+                                                                                                                                                                                                        
+
+Saving the data-preping-numpy-random state:
+	--> saving it
+	--> freeing its memory
+
+Loading the dataset:
+
+Get all the unique characters that occur in this text:
+	--> all the unique characters: '\t\n !#%()*+,-./0123456789:<=>abcdefghijklmnopqrstuvwxyz'
+	--> vocab size: 54
+
+Create a mapping from characters to integers:
+
+Save the meta information as well, to help us encode/decode later:
+	--> freeing its memory
+
+Split by examples using \n\n:
+	--> splitting
+	--> freeing data memory
+	--> total number of examples: 200,000,000
+
+
+Creating the train.txt, val.txt and test.txt:
+	--> shuffling examples
+	--> creating the train_examples
+	--> train_examples has 160000000 examples
+	--> creating the train_data
+	--> train_data has 30302247481 tokens
+	--> writing the train_data to train.txt
+	--> creating the val_examples
+	--> val_examples has 20000000 examples
+	--> creating the val_data
+	--> val_data has 3787098902 tokens
+	--> writing the val_data to val.txt
+	--> creating the test_examples
+	--> test_examples has 20000000 examples
+	--> creating the test_data
+	--> test_data has 3788161040 tokens
+	--> writing the test_data to test.txt
+	--> freeing examples memory
+
+We define the encoding function:
+
+Reading and encoding train.txt directly to binary:
diff --git a/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-1.py b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-1.py
new file mode 100644
index 0000000..c4c17bc
--- /dev/null
+++ b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-1.py
@@ -0,0 +1,198 @@
+## Data preping (on Kindi)
+DIR = "/data/yb2618/Tiny-Language-Models-Framework/datasets/dataset-3/datapreps/dataprep-1/"
+
+
+## Imports
+import pickle
+import numpy as np
+import gc
+import struct
+import time
+
+## Logging boilerplate
+log_file = open(DIR+"data-preping-atmpt-1.log", "w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds 
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+
+
+## Saving the numpy random state
+log("Saving the data-preping-numpy-random state")
+log("saving it")
+np_random_state = np.random.get_state()
+with open(DIR+"data-preping-np-random-state.bin", "wb") as f:
+	pickle.dump(np_random_state, f)
+log("freeing its memory")
+del np_random_state
+gc.collect()
+
+
+## Loading the dataset
+log("Loading the dataset")
+with open("/data/hb3020/datasets/200M-simplified/data-200M-simplified.txt", "r") as f:
+	data = f.read()
+
+
+## Get all the unique characters that occur in this text
+log("Get all the unique characters that occur in this text")
+chars = sorted(list(set(data)))
+vocab_size = len(chars)
+log("all the unique characters: " + repr(''.join(chars)))
+log(f"vocab size: {vocab_size:,}")
+
+
+## Create a mapping from characters to integers
+log("Create a mapping from characters to integers")
+stoi = { ch:i for i,ch in enumerate(chars) }
+itos = { i:ch for i,ch in enumerate(chars) }
+
+
+## Save the meta information as well, to help us encode/decode later
+log("Save the meta information as well, to help us encode/decode later")
+meta = {
+	'vocab_size': vocab_size,
+	'itos': itos,
+	'stoi': stoi,
+}
+with open(DIR+'data/meta.pkl', 'wb') as f:
+    pickle.dump(meta, f)
+log("freeing its memory")
+del meta
+gc.collect()
+
+
+## Split by examples using \n\n
+log("Split by examples using \\n\\n")
+log("splitting")
+examples = data.split("\n\n")[:-1]
+log("freeing data memory")
+del data
+gc.collect()
+n = len(examples)
+log(f"total number of examples: {n:,}\n")
+
+
+## Creating the train.txt, val.txt and test.txt
+log("Creating the train.txt, val.txt and test.txt")
+log("shuffling examples")
+np.random.shuffle(examples)
+
+log("creating the train_examples")
+train_examples = examples[:int(n*0.8)]
+log(f"train_examples has {len(train_examples)} examples")
+log("creating the train_data")
+train_data = "\n\n".join(train_examples)
+del train_examples
+log(f"train_data has {(train_tokens := len(train_data))} tokens")
+log("writing the train_data to train.txt")
+with open(DIR+"data/train.txt", 'w') as f:
+	f.write(train_data)
+del train_data
+
+log("creating the val_examples")
+val_examples = examples[int(n*0.8):int(n*0.9)]
+log(f"val_examples has {len(val_examples)} examples")
+log("creating the val_data")
+val_data = "\n\n".join(val_examples)
+del val_examples
+log(f"val_data has {(val_tokens := len(val_data))} tokens")
+log("writing the val_data to val.txt")
+with open(DIR+"data/val.txt", 'w') as f:
+	f.write(val_data)
+del val_data
+
+log("creating the test_examples")
+test_examples = examples[int(n*0.9):]
+log(f"test_examples has {len(test_examples)} examples")
+log("creating the test_data")
+test_data = "\n\n".join(test_examples)
+del test_examples
+log(f"test_data has {len(test_data)} tokens")
+log("writing the test_data to test.txt")
+with open(DIR+"data/test.txt", 'w') as f:
+	f.write(test_data)
+del test_data
+
+log("freeing examples memory")
+del examples
+gc.collect()
+
+## We read the stoi from the meta.pkl
+with open(DIR+"data/meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+del meta
+
+## We define the encoding function
+log("We define the encoding function")
+def encode_generator(s:str):
+	for c in s:
+		yield stoi[c]
+
+log("Reading and encoding train.txt directly to binary")
+with open(DIR+"data/train.txt", "r") as f, open(DIR+"data/train.bin", "wb") as bin_file:
+	chunk_size = 1024 * 1024 * 1000  # 1 GB
+	max_iters = int(train_tokens/chunk_size)
+	i = 0
+	while True:
+		past = time.time()
+		chunk = f.read(chunk_size)
+		if not chunk:
+			break
+		for token in encode_generator(chunk):
+			bin_file.write(struct.pack('H', token))  # 'H' stands for unsigned short (2 bytes)
+		i = i+1
+		present = time.time()
+		#>>>ERROR HAPPENED HERE: forgot to replace iter with i
+		log(f"|ITERS: {i+1} / {max_iters} | COMP: {(iter+1)/max_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((max_iters-iter-1) * (present-past))}", p_level = 1)
+
+log("Reading and encoding val.txt directly to binary")
+with open(DIR+"data/val.txt", "r") as f, open(DIR+"data/val.bin", "wb") as bin_file:
+	chunk_size = 1024 * 1024 * 1000  # 1 GB
+	max_iters = int(val_tokens/chunk_size)
+	i = 0
+	while True:
+		past = time.time()
+		chunk = f.read(chunk_size)
+		if not chunk:
+			break
+		for token in encode_generator(chunk):
+			bin_file.write(struct.pack('H', token))  # 'H' stands for unsigned short (2 bytes)
+		i = i+1
+		present = time.time()
+		log(f"|ITERS: {iter+1} / {max_iters} | COMP: {(i+1)/max_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((max_iters-iter-1) * (present-past))}", p_level = 2)
+
+log_file.close()
\ No newline at end of file
diff --git a/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-2.log b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-2.log
new file mode 100644
index 0000000..2f0b95b
--- /dev/null
+++ b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-2.log
@@ -0,0 +1,12 @@
+|ITERS: 30 / 28 | COMP: 107.14% | RATE: 0.00 it./s | SPD: 274.8309 s/it.| ERT: (-1, 23, 50, 51)                                                                                                         
+|ITERS: 5 / 3 | COMP: 166.67% | RATE: 0.01 it./s | SPD: 182.1442 s/it.| ERT: (-1, 23, 53, 56)                                                                                                           
+
+Getting back the number of tokens in the .txt files:
+	--> train_tokens 30302247481
+	--> val_tokens 3787098902
+
+We define the encoding function:
+
+Reading and encoding train.txt directly to binary:
+
+Reading and encoding val.txt directly to binary:
diff --git a/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-2.py b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-2.py
new file mode 100644
index 0000000..21354a1
--- /dev/null
+++ b/datasets/dataset-3/datapreps/dataprep-1/data-preping-atmpt-2.py
@@ -0,0 +1,109 @@
+## Data preping (on Kindi)
+DIR = "/data/yb2618/Tiny-Language-Models-Framework/datasets/dataset-3/datapreps/dataprep-1/"
+
+
+## Imports
+import pickle
+import struct
+import time
+import os
+
+## Logging boilerplate
+log_file = open(DIR+"data-preping-atmpt-2.log", "w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds 
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+
+
+## We read the stoi from the meta.pkl
+with open(DIR+"data/meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+del meta
+
+
+## Getting back the number of tokens in the .txt files
+log("Getting back the number of tokens in the .txt files")
+train_tokens = os.path.getsize(DIR+"data/train.txt")
+log(f"train_tokens {train_tokens}")
+val_tokens = os.path.getsize(DIR+"data/val.txt")
+log(f"val_tokens {val_tokens}")
+
+## We define the encoding function
+log("We define the encoding function")
+def encode_generator(s:str):
+	for c in s:
+		yield stoi[c]
+
+log("Reading and encoding train.txt directly to binary")
+with open(DIR+"data/train.txt", "r") as f, open(DIR+"data/train.bin", "wb") as bin_file:
+	chunk_size = 1024 * 1024 * 1000  # 1 GB
+	max_iters = int(train_tokens/chunk_size)
+	i = 0
+	while True:
+		past = time.time()
+		chunk = f.read(chunk_size)
+		if not chunk:
+			break
+		for token in encode_generator(chunk):
+			# past2 = time.time()
+			bin_file.write(struct.pack('H', token))  # 'H' stands for unsigned short (2 bytes)
+			# present2 = time.time()
+			# log(f"|ITERS: {j+1} / {chunk_size:,} | COMP: {(j+1)/chunk_size * 100:.2f}% | RATE: {1/(present2-past2):.2f} it./s | SPD: {present2 - past2 :.4f} s/it.| ERT: {convert_seconds((chunk_size-j-1) * (present2-past2))}", p_level = 2)
+
+		i = i+1
+		present = time.time()
+		#>>>ERROR HAPPENED HERE: forgot to replace iter with i
+		log(f"|ITERS: {i+1} / {max_iters} | COMP: {(i+1)/max_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((max_iters-i-1) * (present-past))}", p_level = 1)
+
+log("Reading and encoding val.txt directly to binary")
+with open(DIR+"data/val.txt", "r") as f, open(DIR+"data/val.bin", "wb") as bin_file:
+	chunk_size = 1024 * 1024 * 1000  # 1 GB
+	max_iters = int(val_tokens/chunk_size)
+	i = 0
+	while True:
+		past = time.time()
+		chunk = f.read(chunk_size)
+		if not chunk:
+			break
+		for token in encode_generator(chunk):
+			bin_file.write(struct.pack('H', token))  # 'H' stands for unsigned short (2 bytes)
+		i = i+1
+		present = time.time()
+		##>> MISTAKE: here I should just use i not i+1
+		log(f"|ITERS: {i+1} / {max_iters} | COMP: {(i+1)/max_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((max_iters-i-1) * (present-past))}", p_level = 2)
+
+log_file.close()
\ No newline at end of file
diff --git a/datasets/dataset-3/datapreps/dataprep-1/data-preping-np-random-state.bin b/datasets/dataset-3/datapreps/dataprep-1/data-preping-np-random-state.bin
new file mode 100644
index 0000000..eb174e3
Binary files /dev/null and b/datasets/dataset-3/datapreps/dataprep-1/data-preping-np-random-state.bin differ
diff --git a/datasets/dataset-3/datapreps/dataprep-1/data/.gitkeep b/datasets/dataset-3/datapreps/dataprep-1/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/generalization/.ipynb_checkpoints/demonstration-checkpoint.ipynb b/generalization/.ipynb_checkpoints/demonstration-checkpoint.ipynb
new file mode 100644
index 0000000..e1dbb26
--- /dev/null
+++ b/generalization/.ipynb_checkpoints/demonstration-checkpoint.ipynb
@@ -0,0 +1,312 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "176621a1-0778-4a21-9de9-dd2ebefef5a6",
+   "metadata": {},
+   "source": [
+    "# Evaluating Arithmetic Operations Using Our Finetuned Large Language Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26d10bd3-660c-43df-9719-43fa71b119ea",
+   "metadata": {},
+   "source": [
+    "This notebook demonstrates how to use our fine-tuned Code Llama model to evaluate simple arithmetic operations in Python code snippets. The process involves loading the pre-trained model, preparing the tokenizer, and defining functions to evaluate code examples. We then evaluate various code snippets to observe the model's generated results. This workflow highlights the capabilities of the Code Llama model in executing arithmetic expressions."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac948737-336a-4d73-ba56-64601000e561",
+   "metadata": {},
+   "source": [
+    "## Import Necessary Libraries"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7654d2f3-bae2-4936-b86c-a5a6b0e2ed51",
+   "metadata": {},
+   "source": [
+    "Import essential libraries for model loading, evaluation, and tokenization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c3d4f58-731a-47a8-9121-4689acc8e2c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import warnings\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from peft import PeftModel\n",
+    "\n",
+    "# Ignore all warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "\n",
+    "import transformers\n",
+    "\n",
+    "transformers.logging.set_verbosity_error()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "edd801aa-d88d-42b0-bfac-3901489f7642",
+   "metadata": {},
+   "source": [
+    "## Load the Pre-trained Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "74874a67-6e25-4395-81fa-768c630db298",
+   "metadata": {},
+   "source": [
+    "Load the pre-trained Code Llama model in 8-bit precision."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b1d25be0-8aed-4825-976f-f066aa45eb8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9e7fc7c9fd3b422fa5413277b16ac29e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Charger le modèle pré-entraîné\n",
+    "base_model = \"codellama/CodeLlama-7b-hf\"\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    base_model,\n",
+    "    load_in_8bit=True,\n",
+    "    torch_dtype=torch.float16,\n",
+    "    device_map=\"auto\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3987ca4f-b25d-4d9d-9dc2-79c4beeb7917",
+   "metadata": {},
+   "source": [
+    "## Load the Tokenizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1f5cdc6-48f6-4765-8c7c-76ba6a47e5e4",
+   "metadata": {},
+   "source": [
+    "Load the tokenizer corresponding to the pre-trained model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d7fb73a7-d643-4c31-866e-d34baf0dbff5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Charger le tokenizer\n",
+    "tokenizer = AutoTokenizer.from_pretrained(base_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "798b475a-8667-4d52-b028-ac04ec129746",
+   "metadata": {},
+   "source": [
+    "## Load the Fine-Tuned Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67fa550a-8c96-4549-a496-de5d6c3215e6",
+   "metadata": {},
+   "source": [
+    "Load the fine-tuned model from the checkpoint directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "17bc03cf-634d-460d-806f-ce1ff8bf6d18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint_dir = \"models/code-llama-finetuned-level1\"\n",
+    "model = PeftModel.from_pretrained(model, checkpoint_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1aa98af7-89d9-4a16-b266-18fae4cd12b9",
+   "metadata": {},
+   "source": [
+    "## Examples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b6d2155d-0d2c-4c6b-a438-d4e976e5f49a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a = 9\n",
+      "b = 7 \n",
+      "c = a * b\n",
+      "print(c)\n",
+      "# output\n",
+      "# 63\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_prompt = \"\"\"\n",
+    "a = 9\n",
+    "b = 7 \n",
+    "c = a * b\n",
+    "print(c)\n",
+    "# output\n",
+    "\"\"\"\n",
+    "\n",
+    "# Tokeniser l'invite d'évaluation\n",
+    "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
+    "\n",
+    "# Évaluer le modèle\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    output = tokenizer.decode(model.generate(**model_input, max_new_tokens=30, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)\n",
+    "\n",
+    "# Afficher le résultat\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "edabaa2f-3170-48fb-b45e-43c1fd7d5cdb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "e = 10\n",
+      "c = 10\n",
+      "a = e / c\n",
+      "print(a)\n",
+      "# output\n",
+      "# 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_prompt = \"\"\"\n",
+    "e = 10\n",
+    "c = 10\n",
+    "a = e / c\n",
+    "print(a)\n",
+    "# output\n",
+    "\"\"\"\n",
+    "\n",
+    "# Tokeniser l'invite d'évaluation\n",
+    "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
+    "\n",
+    "# Évaluer le modèle\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    output = tokenizer.decode(model.generate(**model_input, max_new_tokens=30, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)\n",
+    "\n",
+    "# Afficher le résultat\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e4e818ed-27c1-4bd0-b1fe-35610851cc31",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a = 9\n",
+      "d = 3\n",
+      "d = 1\n",
+      "d = 5 + 8\n",
+      "print(d * 5)\n",
+      "# output\n",
+      "# 45\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_prompt = \"\"\"\n",
+    "a = 9\n",
+    "d = 3\n",
+    "d = 1\n",
+    "d = 5 + 8\n",
+    "print(d * 5)\n",
+    "# output\n",
+    "\"\"\"\n",
+    "\n",
+    "# Tokeniser l'invite d'évaluation\n",
+    "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
+    "\n",
+    "# Évaluer le modèle\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    output = tokenizer.decode(model.generate(**model_input, max_new_tokens=30, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)\n",
+    "\n",
+    "# Afficher le résultat\n",
+    "print(output)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "TinyLM",
+   "language": "python",
+   "name": "tinylm"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/generalization/.ipynb_checkpoints/evaluate-checkpoint.py b/generalization/.ipynb_checkpoints/evaluate-checkpoint.py
new file mode 100644
index 0000000..6e8cb19
--- /dev/null
+++ b/generalization/.ipynb_checkpoints/evaluate-checkpoint.py
@@ -0,0 +1,111 @@
+import os
+import re
+import torch
+import warnings
+from tqdm import tqdm
+import argparse
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import pandas as pd
+
+import transformers
+
+transformers.logging.set_verbosity_error()
+
+# Ignore all warnings
+warnings.filterwarnings("ignore")
+
+def evaluate_model(base_model="codellama/CodeLlama-7b-hf",
+                   checkpoint_dir="models/code-llama-finetuned-level1",
+                   test_file="../data/test.txt",
+                   output_file='results/result_llama.txt',
+                   csv_file='results/results_llama.csv',
+                   max_new_tokens=30):
+    
+    print("Evaluating model...")
+    print()
+
+    # Load the pretrained model
+    print("Loading the pretrained model...")
+    print()
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map="auto",
+    )
+    
+    # Load the tokenizer
+    print("Loading the tokenizer...")
+    print()
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
+
+    # Load the fine-tuned model
+    print("Loading the fine-tuned model...")
+    print()
+    model = PeftModel.from_pretrained(model, checkpoint_dir)
+    
+    # Load and preprocess the test data
+    print("Loading the test data...")
+    print()
+    with open(test_file, 'r', encoding='utf-8') as f:
+        text = f.read()
+
+    examples = [example for example in text.split("\n\n") if example]
+
+    data = []
+
+    print("Generating predictions...")
+    print()
+    for example in tqdm(examples):
+        splited_example = example.split("# output\n")
+        prompt_text = splited_example[0] + "# output\n"
+        real_response = splited_example[1]
+        
+        real_number_response = re.search(r"\d+\.\d+|\d+|-\d+|-\d+\.\d+", real_response.replace("\n", ""))
+        real_result = float(real_number_response.group()) if real_number_response else 0.0
+        
+        model_input = tokenizer(prompt_text, return_tensors="pt").to("cuda")
+        response = tokenizer.decode(model.generate(**model_input, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)
+        
+        splited_response = response.split("# output")
+        number_response = re.search(r"\d+\.\d+|\d+|-\d+|-\d+\.\d+", splited_response[1].replace("\n", ""))
+        generated_result = float(number_response.group()) if number_response else 0.0
+
+        data.append({'Prompt': prompt_text, 'Real_Results': real_result, 'Generated_Results': generated_result})
+
+    # Calculate accuracy
+    accuracy = sum(1 for d in data if d['Real_Results'] == d['Generated_Results']) / len(data)
+    print(f"Accuracy: {accuracy * 100:.2f}%")
+
+    # Store accuracy in a file
+    print("Storing accuracy in a file...")
+    print()
+    with open(output_file, 'w') as f:
+        f.write(f"Accuracy: {accuracy * 100:.2f}%\n")
+
+    # Store results in a CSV file using pandas
+    print("Storing results in a CSV file...")
+    print()
+    df = pd.DataFrame(data)
+    df.to_csv(csv_file, index=False)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Evaluate a model with specified parameters.")
+    parser.add_argument('--base_model', type=str, default="codellama/CodeLlama-7b-hf", help="Base model name or path")
+    parser.add_argument('--checkpoint_dir', type=str, default="models/code-llama-finetuned-level1", help="Directory containing the model checkpoint")
+    parser.add_argument('--test_file', type=str, default="../data/test.txt", help="Path to the test file")
+    parser.add_argument('--output_file', type=str, default='results/result_llama.txt', help="Path to the output file where the accuracy will be stored")
+    parser.add_argument('--csv_file', type=str, default='results/results_llama.csv', help="Path to the CSV file where the results will be stored")
+    parser.add_argument('--max_new_tokens', type=int, default=30, help="Maximum number of new tokens to generate")
+
+    args = parser.parse_args()
+
+    evaluate_model(
+        base_model=args.base_model,
+        checkpoint_dir=args.checkpoint_dir,
+        test_file=args.test_file,
+        output_file=args.output_file,
+        csv_file=args.csv_file,
+        max_new_tokens=args.max_new_tokens
+    )
diff --git a/generalization/.ipynb_checkpoints/finetune-checkpoint.py b/generalization/.ipynb_checkpoints/finetune-checkpoint.py
new file mode 100644
index 0000000..4b9d764
--- /dev/null
+++ b/generalization/.ipynb_checkpoints/finetune-checkpoint.py
@@ -0,0 +1,194 @@
+import os
+import sys
+from datetime import datetime
+import argparse
+import warnings
+
+import torch
+from peft import (
+    LoraConfig,
+    get_peft_model,
+    get_peft_model_state_dict,
+    prepare_model_for_int8_training,
+    set_peft_model_state_dict,
+)
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
+from datasets import load_from_disk
+
+# Ignore all warnings
+warnings.filterwarnings("ignore")
+
+import transformers
+
+transformers.logging.set_verbosity_error()
+
+def train_model(base_model="codellama/CodeLlama-7b-hf",
+                train_dataset_path="data/tokenized_train",
+                val_dataset_path="data/tokenized_val",
+                resume_from_checkpoint="",
+                wandb_project="tiny-coder",
+                batch_size=128,
+                per_device_train_batch_size=32,
+                gradient_accumulation_steps=4,
+                output_dir="models/code-llama-finetuned-level1",
+                learning_rate=3e-4,
+                warmup_steps=100,
+                max_steps=200,
+                logging_steps=10,
+                eval_steps=20,
+                save_steps=20):
+    
+    print("Fine-tuning model...")
+    print()
+
+    # Load the pretrained model
+    print("Loading the pretrained model...")
+    print()
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map="auto",
+    )
+
+        
+    # Load the tokenizer
+    print("Loading the tokenizer...")
+    print()
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
+    tokenizer.add_eos_token = True
+    tokenizer.pad_token_id = 0
+    tokenizer.padding_side = "left"
+
+    # Load the tokenized datasets
+    print("Loading the tokenized datasets...")
+    print()
+    tokenized_train_dataset = load_from_disk(train_dataset_path)
+    tokenized_val_dataset = load_from_disk(val_dataset_path)
+
+    # Prepare the model for int8 training
+    model.train()
+    model = prepare_model_for_int8_training(model)
+    
+    # Configure Lora settings
+    config = LoraConfig(
+        r=16,
+        lora_alpha=16,
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    model = get_peft_model(model, config)
+
+    # Resume from checkpoint if specified
+    if resume_from_checkpoint and os.path.exists(resume_from_checkpoint):
+        print(f"Restarting from {resume_from_checkpoint}")
+        adapters_weights = torch.load(resume_from_checkpoint)
+        set_peft_model_state_dict(model, adapters_weights)
+    elif resume_from_checkpoint:
+        print(f"Checkpoint {resume_from_checkpoint} not found")
+
+    # Setup Weights and Biases if project name is given
+    if wandb_project:
+        print("Setting up Weights and Biases...")
+        print()
+        os.environ["WANDB_PROJECT"] = wandb_project
+        os.environ['WANDB__EXECUTABLE'] = sys.executable
+
+    # Enable parallelism if multiple GPUs are available
+    if torch.cuda.device_count() > 1:
+        print("Enabling parallelism...")
+        print()
+        model.is_parallelizable = True
+        model.model_parallel = True
+
+    # Training arguments
+    print("Setting up training arguments...")
+    print()
+    training_args = TrainingArguments(
+        per_device_train_batch_size=per_device_train_batch_size,
+        gradient_accumulation_steps=gradient_accumulation_steps,
+        warmup_steps=warmup_steps,
+        max_steps=max_steps,
+        learning_rate=learning_rate,
+        fp16=True,
+        logging_steps=logging_steps,
+        optim="adamw_torch",
+        evaluation_strategy="steps",
+        save_strategy="steps",
+        eval_steps=eval_steps,
+        save_steps=save_steps,
+        output_dir=output_dir,
+        group_by_length=True,
+        report_to="wandb",
+        run_name=f"codellama-{datetime.now().strftime('%Y-%m-%d-%H-%M')}",
+    )
+
+    # Initialize the Trainer
+    trainer = Trainer(
+        model=model,
+        train_dataset=tokenized_train_dataset,
+        eval_dataset=tokenized_val_dataset,
+        args=training_args,
+        data_collator=DataCollatorForSeq2Seq(
+            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
+        ),
+    )
+
+    # Disable caching for training
+    model.config.use_cache = False
+
+    # Patch the model's state_dict
+    old_state_dict = model.state_dict
+    model.state_dict = (lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())).__get__(
+        model, type(model)
+    )
+
+    # Compile the model if applicable
+    if torch.__version__ >= "2" and sys.platform != "win32":
+        print("Compiling the model...")
+        print()
+        model = torch.compile(model)
+
+    # Start training
+    print("Starting training...")
+    trainer.train()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Train a model with specified parameters.")
+    parser.add_argument('--base_model', type=str, default="codellama/CodeLlama-7b-hf", help="Base model name or path")
+    parser.add_argument('--train_dataset_path', type=str, default="data/tokenized_train", help="Path to the tokenized training dataset")
+    parser.add_argument('--val_dataset_path', type=str, default="data/tokenized_val", help="Path to the tokenized validation dataset")
+    parser.add_argument('--resume_from_checkpoint', type=str, default="", help="Path to checkpoint to resume training from")
+    parser.add_argument('--wandb_project', type=str, default="tiny-coder", help="WandB project name")
+    parser.add_argument('--batch_size', type=int, default=128, help="Total batch size for training")
+    parser.add_argument('--per_device_train_batch_size', type=int, default=32, help="Batch size per device for training")
+    parser.add_argument('--gradient_accumulation_steps', type=int, default=4, help="Number of gradient accumulation steps")
+    parser.add_argument('--output_dir', type=str, default="models/code-llama-finetuned-level1", help="Directory to save the output")
+    parser.add_argument('--learning_rate', type=float, default=3e-4, help="Learning rate")
+    parser.add_argument('--warmup_steps', type=int, default=100, help="Number of warmup steps")
+    parser.add_argument('--max_steps', type=int, default=200, help="Maximum number of training steps")
+    parser.add_argument('--logging_steps', type=int, default=10, help="Number of steps between logging")
+    parser.add_argument('--eval_steps', type=int, default=20, help="Number of steps between evaluations")
+    parser.add_argument('--save_steps', type=int, default=20, help="Number of steps between saving checkpoints")
+
+    args = parser.parse_args()
+
+    train_model(
+        base_model=args.base_model,
+        train_dataset_path=args.train_dataset_path,
+        val_dataset_path=args.val_dataset_path,
+        resume_from_checkpoint=args.resume_from_checkpoint,
+        wandb_project=args.wandb_project,
+        batch_size=args.batch_size,
+        per_device_train_batch_size=args.per_device_train_batch_size,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        output_dir=args.output_dir,
+        learning_rate=args.learning_rate,
+        warmup_steps=args.warmup_steps,
+        max_steps=args.max_steps,
+        logging_steps=args.logging_steps,
+        eval_steps=args.eval_steps,
+        save_steps=args.save_steps
+    )
\ No newline at end of file
diff --git a/generalization/.ipynb_checkpoints/tokenizing-checkpoint.py b/generalization/.ipynb_checkpoints/tokenizing-checkpoint.py
new file mode 100644
index 0000000..14a446e
--- /dev/null
+++ b/generalization/.ipynb_checkpoints/tokenizing-checkpoint.py
@@ -0,0 +1,82 @@
+import argparse
+import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer
+
+import transformers
+
+transformers.logging.set_verbosity_error()
+
+def tokenize_data(train_file="../data/train.txt",
+                  test_file="../data/val.txt",
+                  tokenizer_name="codellama/CodeLlama-7b-hf",
+                  train_output_dir="data/tokenized_train",
+                  val_output_dir="data/tokenized_val"):
+    print("Tokenizing data...")
+
+    # Read the training and test data
+    with open(train_file) as f:
+        train_data = f.read()
+
+    with open(test_file) as f:
+        test_data = f.read()
+
+    # Split the snippets into individual examples
+    train_snippets = train_data.split('\n\n')
+    test_snippets = test_data.split('\n\n')
+
+    # Create datasets from the snippets
+    train_dataset = Dataset.from_pandas(pd.DataFrame({'snippets': train_snippets}))
+    eval_dataset = Dataset.from_pandas(pd.DataFrame({'snippets': test_snippets}))
+
+    # Load the tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+    tokenizer.add_eos_token = True
+    tokenizer.pad_token_id = 0
+    tokenizer.padding_side = "left"
+
+    # Function to tokenize a prompt
+    def tokenize(prompt):
+        result = tokenizer(
+            prompt,
+            truncation=True,
+            max_length=512,
+            padding=False,
+            return_tensors=None,
+        )
+
+        # For self-supervised learning, labels are also the inputs
+        result["labels"] = result["input_ids"].copy()
+
+        return result
+
+    # Function to generate and tokenize a prompt
+    def generate_and_tokenize_prompt(data_point):
+        full_prompt = data_point["snippets"]
+
+        return tokenize(full_prompt)
+
+    # Tokenize the training and validation datasets
+    print("Tokenizing datasets...")
+    tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
+    tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)
+
+    # Save the tokenized datasets to disk
+    print(f"Saving tokenized datasets to {train_output_dir} and {val_output_dir}...")
+    tokenized_train_dataset.save_to_disk(train_output_dir)
+    tokenized_val_dataset.save_to_disk(val_output_dir)
+    
+    print("Tokenization complete.")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Tokenize data for language model training.")
+    parser.add_argument("--train_file", type=str, default="../data/train.txt", help="Path to the training file")
+    parser.add_argument("--test_file", type=str, default="../data/val.txt", help="Path to the test file")
+    parser.add_argument("--tokenizer_name", type=str, default="codellama/CodeLlama-7b-hf", help="Name or path of the tokenizer")
+    parser.add_argument("--train_output_dir", type=str, default="data/tokenized_train", help="Path to save the tokenized training dataset")
+    parser.add_argument("--val_output_dir", type=str, default="data/tokenized_val", help="Path to save the tokenized validation dataset")
+    
+    args = parser.parse_args()
+
+    tokenize_data(args.train_file, args.test_file, args.tokenizer_name, args.train_output_dir, args.val_output_dir)
diff --git a/generalization/data/tokenized_train/data-00000-of-00001.arrow b/generalization/data/tokenized_train/data-00000-of-00001.arrow
new file mode 100644
index 0000000..88be929
Binary files /dev/null and b/generalization/data/tokenized_train/data-00000-of-00001.arrow differ
diff --git a/generalization/data/tokenized_train/dataset_info.json b/generalization/data/tokenized_train/dataset_info.json
new file mode 100644
index 0000000..c5ed9a8
--- /dev/null
+++ b/generalization/data/tokenized_train/dataset_info.json
@@ -0,0 +1,33 @@
+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "snippets": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "input_ids": {
+      "feature": {
+        "dtype": "int32",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "attention_mask": {
+      "feature": {
+        "dtype": "int8",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "labels": {
+      "feature": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}
\ No newline at end of file
diff --git a/generalization/data/tokenized_train/state.json b/generalization/data/tokenized_train/state.json
new file mode 100644
index 0000000..a9079c7
--- /dev/null
+++ b/generalization/data/tokenized_train/state.json
@@ -0,0 +1,13 @@
+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "0b9633717850bc07",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}
\ No newline at end of file
diff --git a/generalization/data/tokenized_val/data-00000-of-00001.arrow b/generalization/data/tokenized_val/data-00000-of-00001.arrow
new file mode 100644
index 0000000..7305205
Binary files /dev/null and b/generalization/data/tokenized_val/data-00000-of-00001.arrow differ
diff --git a/generalization/data/tokenized_val/dataset_info.json b/generalization/data/tokenized_val/dataset_info.json
new file mode 100644
index 0000000..c5ed9a8
--- /dev/null
+++ b/generalization/data/tokenized_val/dataset_info.json
@@ -0,0 +1,33 @@
+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "snippets": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "input_ids": {
+      "feature": {
+        "dtype": "int32",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "attention_mask": {
+      "feature": {
+        "dtype": "int8",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "labels": {
+      "feature": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}
\ No newline at end of file
diff --git a/generalization/data/tokenized_val/state.json b/generalization/data/tokenized_val/state.json
new file mode 100644
index 0000000..0cb7a77
--- /dev/null
+++ b/generalization/data/tokenized_val/state.json
@@ -0,0 +1,13 @@
+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "cc8d31de4b82da8d",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}
\ No newline at end of file
diff --git a/generalization/demonstration.ipynb b/generalization/demonstration.ipynb
new file mode 100644
index 0000000..e1dbb26
--- /dev/null
+++ b/generalization/demonstration.ipynb
@@ -0,0 +1,312 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "176621a1-0778-4a21-9de9-dd2ebefef5a6",
+   "metadata": {},
+   "source": [
+    "# Evaluating Arithmetic Operations Using Our Finetuned Large Language Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26d10bd3-660c-43df-9719-43fa71b119ea",
+   "metadata": {},
+   "source": [
+    "This notebook demonstrates how to use our fine-tuned Code Llama model to evaluate simple arithmetic operations in Python code snippets. The process involves loading the pre-trained model, preparing the tokenizer, and defining functions to evaluate code examples. We then evaluate various code snippets to observe the model's generated results. This workflow highlights the capabilities of the Code Llama model in executing arithmetic expressions."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac948737-336a-4d73-ba56-64601000e561",
+   "metadata": {},
+   "source": [
+    "## Import Necessary Libraries"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7654d2f3-bae2-4936-b86c-a5a6b0e2ed51",
+   "metadata": {},
+   "source": [
+    "Import essential libraries for model loading, evaluation, and tokenization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c3d4f58-731a-47a8-9121-4689acc8e2c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import warnings\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from peft import PeftModel\n",
+    "\n",
+    "# Ignore all warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "\n",
+    "import transformers\n",
+    "\n",
+    "transformers.logging.set_verbosity_error()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "edd801aa-d88d-42b0-bfac-3901489f7642",
+   "metadata": {},
+   "source": [
+    "## Load the Pre-trained Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "74874a67-6e25-4395-81fa-768c630db298",
+   "metadata": {},
+   "source": [
+    "Load the pre-trained Code Llama model in 8-bit precision."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b1d25be0-8aed-4825-976f-f066aa45eb8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9e7fc7c9fd3b422fa5413277b16ac29e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Charger le modèle pré-entraîné\n",
+    "base_model = \"codellama/CodeLlama-7b-hf\"\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    base_model,\n",
+    "    load_in_8bit=True,\n",
+    "    torch_dtype=torch.float16,\n",
+    "    device_map=\"auto\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3987ca4f-b25d-4d9d-9dc2-79c4beeb7917",
+   "metadata": {},
+   "source": [
+    "## Load the Tokenizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1f5cdc6-48f6-4765-8c7c-76ba6a47e5e4",
+   "metadata": {},
+   "source": [
+    "Load the tokenizer corresponding to the pre-trained model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d7fb73a7-d643-4c31-866e-d34baf0dbff5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Charger le tokenizer\n",
+    "tokenizer = AutoTokenizer.from_pretrained(base_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "798b475a-8667-4d52-b028-ac04ec129746",
+   "metadata": {},
+   "source": [
+    "## Load the Fine-Tuned Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67fa550a-8c96-4549-a496-de5d6c3215e6",
+   "metadata": {},
+   "source": [
+    "Load the fine-tuned model from the checkpoint directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "17bc03cf-634d-460d-806f-ce1ff8bf6d18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint_dir = \"models/code-llama-finetuned-level1\"\n",
+    "model = PeftModel.from_pretrained(model, checkpoint_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1aa98af7-89d9-4a16-b266-18fae4cd12b9",
+   "metadata": {},
+   "source": [
+    "## Examples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b6d2155d-0d2c-4c6b-a438-d4e976e5f49a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a = 9\n",
+      "b = 7 \n",
+      "c = a * b\n",
+      "print(c)\n",
+      "# output\n",
+      "# 63\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_prompt = \"\"\"\n",
+    "a = 9\n",
+    "b = 7 \n",
+    "c = a * b\n",
+    "print(c)\n",
+    "# output\n",
+    "\"\"\"\n",
+    "\n",
+    "# Tokeniser l'invite d'évaluation\n",
+    "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
+    "\n",
+    "# Évaluer le modèle\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    output = tokenizer.decode(model.generate(**model_input, max_new_tokens=30, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)\n",
+    "\n",
+    "# Afficher le résultat\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "edabaa2f-3170-48fb-b45e-43c1fd7d5cdb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "e = 10\n",
+      "c = 10\n",
+      "a = e / c\n",
+      "print(a)\n",
+      "# output\n",
+      "# 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_prompt = \"\"\"\n",
+    "e = 10\n",
+    "c = 10\n",
+    "a = e / c\n",
+    "print(a)\n",
+    "# output\n",
+    "\"\"\"\n",
+    "\n",
+    "# Tokeniser l'invite d'évaluation\n",
+    "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
+    "\n",
+    "# Évaluer le modèle\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    output = tokenizer.decode(model.generate(**model_input, max_new_tokens=30, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)\n",
+    "\n",
+    "# Afficher le résultat\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e4e818ed-27c1-4bd0-b1fe-35610851cc31",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a = 9\n",
+      "d = 3\n",
+      "d = 1\n",
+      "d = 5 + 8\n",
+      "print(d * 5)\n",
+      "# output\n",
+      "# 45\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_prompt = \"\"\"\n",
+    "a = 9\n",
+    "d = 3\n",
+    "d = 1\n",
+    "d = 5 + 8\n",
+    "print(d * 5)\n",
+    "# output\n",
+    "\"\"\"\n",
+    "\n",
+    "# Tokeniser l'invite d'évaluation\n",
+    "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
+    "\n",
+    "# Évaluer le modèle\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    output = tokenizer.decode(model.generate(**model_input, max_new_tokens=30, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)\n",
+    "\n",
+    "# Afficher le résultat\n",
+    "print(output)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "TinyLM",
+   "language": "python",
+   "name": "tinylm"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/generalization/evaluate.py b/generalization/evaluate.py
new file mode 100644
index 0000000..6e8cb19
--- /dev/null
+++ b/generalization/evaluate.py
@@ -0,0 +1,111 @@
+import os
+import re
+import torch
+import warnings
+from tqdm import tqdm
+import argparse
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import pandas as pd
+
+import transformers
+
+transformers.logging.set_verbosity_error()
+
+# Ignore all warnings
+warnings.filterwarnings("ignore")
+
+def evaluate_model(base_model="codellama/CodeLlama-7b-hf",
+                   checkpoint_dir="models/code-llama-finetuned-level1",
+                   test_file="../data/test.txt",
+                   output_file='results/result_llama.txt',
+                   csv_file='results/results_llama.csv',
+                   max_new_tokens=30):
+    
+    print("Evaluating model...")
+    print()
+
+    # Load the pretrained model
+    print("Loading the pretrained model...")
+    print()
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map="auto",
+    )
+    
+    # Load the tokenizer
+    print("Loading the tokenizer...")
+    print()
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
+
+    # Load the fine-tuned model
+    print("Loading the fine-tuned model...")
+    print()
+    model = PeftModel.from_pretrained(model, checkpoint_dir)
+    
+    # Load and preprocess the test data
+    print("Loading the test data...")
+    print()
+    with open(test_file, 'r', encoding='utf-8') as f:
+        text = f.read()
+
+    examples = [example for example in text.split("\n\n") if example]
+
+    data = []
+
+    print("Generating predictions...")
+    print()
+    for example in tqdm(examples):
+        splited_example = example.split("# output\n")
+        prompt_text = splited_example[0] + "# output\n"
+        real_response = splited_example[1]
+        
+        real_number_response = re.search(r"\d+\.\d+|\d+|-\d+|-\d+\.\d+", real_response.replace("\n", ""))
+        real_result = float(real_number_response.group()) if real_number_response else 0.0
+        
+        model_input = tokenizer(prompt_text, return_tensors="pt").to("cuda")
+        response = tokenizer.decode(model.generate(**model_input, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)[0], skip_special_tokens=True)
+        
+        splited_response = response.split("# output")
+        number_response = re.search(r"\d+\.\d+|\d+|-\d+|-\d+\.\d+", splited_response[1].replace("\n", ""))
+        generated_result = float(number_response.group()) if number_response else 0.0
+
+        data.append({'Prompt': prompt_text, 'Real_Results': real_result, 'Generated_Results': generated_result})
+
+    # Calculate accuracy
+    accuracy = sum(1 for d in data if d['Real_Results'] == d['Generated_Results']) / len(data)
+    print(f"Accuracy: {accuracy * 100:.2f}%")
+
+    # Store accuracy in a file
+    print("Storing accuracy in a file...")
+    print()
+    with open(output_file, 'w') as f:
+        f.write(f"Accuracy: {accuracy * 100:.2f}%\n")
+
+    # Store results in a CSV file using pandas
+    print("Storing results in a CSV file...")
+    print()
+    df = pd.DataFrame(data)
+    df.to_csv(csv_file, index=False)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Evaluate a model with specified parameters.")
+    parser.add_argument('--base_model', type=str, default="codellama/CodeLlama-7b-hf", help="Base model name or path")
+    parser.add_argument('--checkpoint_dir', type=str, default="models/code-llama-finetuned-level1", help="Directory containing the model checkpoint")
+    parser.add_argument('--test_file', type=str, default="../data/test.txt", help="Path to the test file")
+    parser.add_argument('--output_file', type=str, default='results/result_llama.txt', help="Path to the output file where the accuracy will be stored")
+    parser.add_argument('--csv_file', type=str, default='results/results_llama.csv', help="Path to the CSV file where the results will be stored")
+    parser.add_argument('--max_new_tokens', type=int, default=30, help="Maximum number of new tokens to generate")
+
+    args = parser.parse_args()
+
+    evaluate_model(
+        base_model=args.base_model,
+        checkpoint_dir=args.checkpoint_dir,
+        test_file=args.test_file,
+        output_file=args.output_file,
+        csv_file=args.csv_file,
+        max_new_tokens=args.max_new_tokens
+    )
diff --git a/generalization/finetune.py b/generalization/finetune.py
new file mode 100644
index 0000000..4b9d764
--- /dev/null
+++ b/generalization/finetune.py
@@ -0,0 +1,194 @@
+import os
+import sys
+from datetime import datetime
+import argparse
+import warnings
+
+import torch
+from peft import (
+    LoraConfig,
+    get_peft_model,
+    get_peft_model_state_dict,
+    prepare_model_for_int8_training,
+    set_peft_model_state_dict,
+)
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
+from datasets import load_from_disk
+
+# Ignore all warnings
+warnings.filterwarnings("ignore")
+
+import transformers
+
+transformers.logging.set_verbosity_error()
+
+def train_model(base_model="codellama/CodeLlama-7b-hf",
+                train_dataset_path="data/tokenized_train",
+                val_dataset_path="data/tokenized_val",
+                resume_from_checkpoint="",
+                wandb_project="tiny-coder",
+                batch_size=128,
+                per_device_train_batch_size=32,
+                gradient_accumulation_steps=4,
+                output_dir="models/code-llama-finetuned-level1",
+                learning_rate=3e-4,
+                warmup_steps=100,
+                max_steps=200,
+                logging_steps=10,
+                eval_steps=20,
+                save_steps=20):
+    
+    print("Fine-tuning model...")
+    print()
+
+    # Load the pretrained model
+    print("Loading the pretrained model...")
+    print()
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map="auto",
+    )
+
+        
+    # Load the tokenizer
+    print("Loading the tokenizer...")
+    print()
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
+    tokenizer.add_eos_token = True
+    tokenizer.pad_token_id = 0
+    tokenizer.padding_side = "left"
+
+    # Load the tokenized datasets
+    print("Loading the tokenized datasets...")
+    print()
+    tokenized_train_dataset = load_from_disk(train_dataset_path)
+    tokenized_val_dataset = load_from_disk(val_dataset_path)
+
+    # Prepare the model for int8 training
+    model.train()
+    model = prepare_model_for_int8_training(model)
+    
+    # Configure Lora settings
+    config = LoraConfig(
+        r=16,
+        lora_alpha=16,
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    model = get_peft_model(model, config)
+
+    # Resume from checkpoint if specified
+    if resume_from_checkpoint and os.path.exists(resume_from_checkpoint):
+        print(f"Restarting from {resume_from_checkpoint}")
+        adapters_weights = torch.load(resume_from_checkpoint)
+        set_peft_model_state_dict(model, adapters_weights)
+    elif resume_from_checkpoint:
+        print(f"Checkpoint {resume_from_checkpoint} not found")
+
+    # Setup Weights and Biases if project name is given
+    if wandb_project:
+        print("Setting up Weights and Biases...")
+        print()
+        os.environ["WANDB_PROJECT"] = wandb_project
+        os.environ['WANDB__EXECUTABLE'] = sys.executable
+
+    # Enable parallelism if multiple GPUs are available
+    if torch.cuda.device_count() > 1:
+        print("Enabling parallelism...")
+        print()
+        model.is_parallelizable = True
+        model.model_parallel = True
+
+    # Training arguments
+    print("Setting up training arguments...")
+    print()
+    training_args = TrainingArguments(
+        per_device_train_batch_size=per_device_train_batch_size,
+        gradient_accumulation_steps=gradient_accumulation_steps,
+        warmup_steps=warmup_steps,
+        max_steps=max_steps,
+        learning_rate=learning_rate,
+        fp16=True,
+        logging_steps=logging_steps,
+        optim="adamw_torch",
+        evaluation_strategy="steps",
+        save_strategy="steps",
+        eval_steps=eval_steps,
+        save_steps=save_steps,
+        output_dir=output_dir,
+        group_by_length=True,
+        report_to="wandb",
+        run_name=f"codellama-{datetime.now().strftime('%Y-%m-%d-%H-%M')}",
+    )
+
+    # Initialize the Trainer
+    trainer = Trainer(
+        model=model,
+        train_dataset=tokenized_train_dataset,
+        eval_dataset=tokenized_val_dataset,
+        args=training_args,
+        data_collator=DataCollatorForSeq2Seq(
+            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
+        ),
+    )
+
+    # Disable caching for training
+    model.config.use_cache = False
+
+    # Patch the model's state_dict
+    old_state_dict = model.state_dict
+    model.state_dict = (lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())).__get__(
+        model, type(model)
+    )
+
+    # Compile the model if applicable
+    if torch.__version__ >= "2" and sys.platform != "win32":
+        print("Compiling the model...")
+        print()
+        model = torch.compile(model)
+
+    # Start training
+    print("Starting training...")
+    trainer.train()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Train a model with specified parameters.")
+    parser.add_argument('--base_model', type=str, default="codellama/CodeLlama-7b-hf", help="Base model name or path")
+    parser.add_argument('--train_dataset_path', type=str, default="data/tokenized_train", help="Path to the tokenized training dataset")
+    parser.add_argument('--val_dataset_path', type=str, default="data/tokenized_val", help="Path to the tokenized validation dataset")
+    parser.add_argument('--resume_from_checkpoint', type=str, default="", help="Path to checkpoint to resume training from")
+    parser.add_argument('--wandb_project', type=str, default="tiny-coder", help="WandB project name")
+    parser.add_argument('--batch_size', type=int, default=128, help="Total batch size for training")
+    parser.add_argument('--per_device_train_batch_size', type=int, default=32, help="Batch size per device for training")
+    parser.add_argument('--gradient_accumulation_steps', type=int, default=4, help="Number of gradient accumulation steps")
+    parser.add_argument('--output_dir', type=str, default="models/code-llama-finetuned-level1", help="Directory to save the output")
+    parser.add_argument('--learning_rate', type=float, default=3e-4, help="Learning rate")
+    parser.add_argument('--warmup_steps', type=int, default=100, help="Number of warmup steps")
+    parser.add_argument('--max_steps', type=int, default=200, help="Maximum number of training steps")
+    parser.add_argument('--logging_steps', type=int, default=10, help="Number of steps between logging")
+    parser.add_argument('--eval_steps', type=int, default=20, help="Number of steps between evaluations")
+    parser.add_argument('--save_steps', type=int, default=20, help="Number of steps between saving checkpoints")
+
+    args = parser.parse_args()
+
+    train_model(
+        base_model=args.base_model,
+        train_dataset_path=args.train_dataset_path,
+        val_dataset_path=args.val_dataset_path,
+        resume_from_checkpoint=args.resume_from_checkpoint,
+        wandb_project=args.wandb_project,
+        batch_size=args.batch_size,
+        per_device_train_batch_size=args.per_device_train_batch_size,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        output_dir=args.output_dir,
+        learning_rate=args.learning_rate,
+        warmup_steps=args.warmup_steps,
+        max_steps=args.max_steps,
+        logging_steps=args.logging_steps,
+        eval_steps=args.eval_steps,
+        save_steps=args.save_steps
+    )
\ No newline at end of file
diff --git a/generalization/results/.ipynb_checkpoints/result_llama-checkpoint.txt b/generalization/results/.ipynb_checkpoints/result_llama-checkpoint.txt
new file mode 100644
index 0000000..c71fe79
--- /dev/null
+++ b/generalization/results/.ipynb_checkpoints/result_llama-checkpoint.txt
@@ -0,0 +1 @@
+Accuracy: 80.00%
diff --git a/generalization/results/.ipynb_checkpoints/results_llama-checkpoint.csv b/generalization/results/.ipynb_checkpoints/results_llama-checkpoint.csv
new file mode 100644
index 0000000..9cbdcd6
--- /dev/null
+++ b/generalization/results/.ipynb_checkpoints/results_llama-checkpoint.csv
@@ -0,0 +1,47 @@
+Prompt,Real_Results,Generated_Results
+"b = 7
+e = 2
+e = 1
+d = 8
+print(b - b)
+# output
+",0.0,0.0
+"a = 7
+c = 0
+a = 1
+for a in range(8, 10) :
+	print(c)
+# output
+",0.0,0.0
+"e = 7
+d = 1
+c = 8
+b = 1
+b = (b / d)-(b * 2)/(2 * d)*(b / c)/(e + 0)-(c + 8)
+if not (b == 9) or (e < 3) or (c > b) and ( d == b) :
+	print(c - e)
+elif (not c < 0) :
+	print(d - b)
+else :
+	print(b)
+# output
+",1.0,7.0
+"d = 2
+d = 0
+d = 6
+if d != 0 :
+	print(d)
+elif not d != d :
+	print(d)
+else :
+	print(d)
+# output
+",6.0,6.0
+"e = 6
+d = 4
+e = 0
+b = d / 8
+for d in range(8, 5) :
+	print(d + 3)
+# output
+",0.0,0.0
diff --git a/generalization/results/result_llama.txt b/generalization/results/result_llama.txt
new file mode 100644
index 0000000..2fe40e2
--- /dev/null
+++ b/generalization/results/result_llama.txt
@@ -0,0 +1 @@
+Accuracy: 96.00%
diff --git a/generalization/results/results_llama.csv b/generalization/results/results_llama.csv
new file mode 100644
index 0000000..5d3fb53
--- /dev/null
+++ b/generalization/results/results_llama.csv
@@ -0,0 +1,510 @@
+Prompt,Real_Results,Generated_Results
+"c = 3
+e = 9
+e = 0 * c
+print(e)
+# output
+",0.0,0.0
+"e = 2
+a = 6
+c = a / 4
+print(c)
+# output
+",1.5,1.5
+"e = 6
+c = 3 / e
+print(c)
+# output
+",0.5,0.5
+"e = 8
+a = 3
+print(e / a)
+# output
+",2.6666666666666665,2.6666666666666665
+"a = 8
+e = a + 1
+print(a / a)
+# output
+",1.0,1.0
+"e = 6
+a = 7
+d = a - 1
+print(d)
+# output
+",6.0,6.0
+"d = 9
+print(d * d)
+# output
+",81.0,81.0
+"c = 4
+print(c + 3)
+# output
+",7.0,7.0
+"d = 7
+print(d / d)
+# output
+",1.0,1.0
+"d = 5
+a = 0
+print(a + d)
+# output
+",5.0,5.0
+"a = 6
+b = 0
+a = 3 - 3
+print(b / 9)
+# output
+",0.0,0.0
+"a = 0
+a = 0
+print(a)
+# output
+",0.0,0.0
+"c = 3
+e = 7
+print(c * c)
+# output
+",9.0,9.0
+"a = 2
+e = 0 / 8
+print(a * a)
+# output
+",4.0,4.0
+"e = 9
+a = 9
+print(a * e)
+# output
+",81.0,81.0
+"d = 8
+e = 4
+print(e + d)
+# output
+",12.0,12.0
+"a = 2
+c = a - 6
+print(a / a)
+# output
+",1.0,1.0
+"a = 6
+b = 7
+d = a - 0
+print(d)
+# output
+",6.0,6.0
+"b = 7
+e = 2
+b = 0 * 7
+print(b)
+# output
+",0.0,0.0
+"c = 9
+e = 4
+print(c - 5)
+# output
+",4.0,4.0
+"d = 4
+d = 7 * 4
+print(d * d)
+# output
+",784.0,289.0
+"a = 4
+print(a / a)
+# output
+",1.0,1.0
+"e = 3
+b = 4
+a = e - 6
+print(a)
+# output
+",-3.0,-3.0
+"d = 5
+d = 9 - 7
+print(d)
+# output
+",2.0,2.0
+"b = 7
+b = 3 / b
+print(b)
+# output
+",0.42857142857142855,0.42857142857142855
+"a = 9
+b = a + a
+print(a / a)
+# output
+",1.0,1.0
+"a = 0
+d = a * 9
+print(d)
+# output
+",0.0,0.0
+"e = 1
+c = e + e
+print(c)
+# output
+",2.0,2.0
+"e = 3
+c = e + 2
+print(e + e)
+# output
+",6.0,6.0
+"a = 4
+e = 0 * 7
+print(a + 2)
+# output
+",6.0,6.0
+"b = 6
+b = 5
+b = b - b
+print(b / 3)
+# output
+",0.0,0.0
+"b = 0
+d = 9 * 1
+print(d)
+# output
+",9.0,9.0
+"a = 4
+a = 2
+print(a)
+# output
+",2.0,2.0
+"e = 9
+b = 3
+print(b)
+# output
+",3.0,3.0
+"d = 6
+c = 4 + 1
+print(d + 2)
+# output
+",8.0,8.0
+"d = 8
+c = d - 6
+print(c)
+# output
+",2.0,2.0
+"a = 9
+d = 1
+print(a * d)
+# output
+",9.0,9.0
+"c = 4
+a = 4
+print(a)
+# output
+",4.0,4.0
+"b = 5
+a = 1
+b = a - a
+print(b)
+# output
+",0.0,0.0
+"d = 2
+c = 1
+print(d)
+# output
+",2.0,2.0
+"e = 3
+b = 5
+print(b * 9)
+# output
+",45.0,45.0
+"e = 9
+d = 5
+d = e * d
+print(e / d)
+# output
+",0.2,0.09090909090909091
+"c = 5
+print(c * c)
+# output
+",25.0,25.0
+"e = 2
+c = 8 / e
+print(e * 9)
+# output
+",18.0,18.0
+"c = 0
+c = 6 - 8
+print(c)
+# output
+",-2.0,-2.0
+"e = 7
+a = 2
+d = 5 * 7
+print(a * 7)
+# output
+",14.0,14.0
+"e = 1
+print(e * e)
+# output
+",1.0,1.0
+"d = 6
+b = d * d
+print(b)
+# output
+",36.0,36.0
+"e = 8
+b = e / 5
+print(e / 2)
+# output
+",4.0,4.0
+"d = 6
+c = 3
+a = 9 + d
+print(a)
+# output
+",15.0,15.0
+"d = 4
+a = 8
+print(a * a)
+# output
+",64.0,64.0
+"e = 6
+print(e + e)
+# output
+",12.0,12.0
+"b = 3
+a = 3
+print(a)
+# output
+",3.0,3.0
+"b = 9
+b = 9
+print(b - 5)
+# output
+",4.0,4.0
+"a = 8
+print(a)
+# output
+",8.0,8.0
+"b = 2
+e = 2
+print(e)
+# output
+",2.0,2.0
+"e = 1
+c = e / 7
+print(e - e)
+# output
+",0.0,0.0
+"e = 3
+c = 0
+print(c)
+# output
+",0.0,0.0
+"a = 1
+a = 3
+print(a / 1)
+# output
+",3.0,3.0
+"c = 3
+print(c / c)
+# output
+",1.0,1.0
+"c = 0
+c = c - c
+print(c)
+# output
+",0.0,0.0
+"a = 7
+d = 2
+print(a)
+# output
+",7.0,7.0
+"c = 3
+a = 0
+print(c - 4)
+# output
+",-1.0,-1.0
+"e = 9
+a = 5
+print(e)
+# output
+",9.0,9.0
+"d = 3
+b = 4
+d = 4 / 3
+print(b * b)
+# output
+",16.0,16.0
+"c = 5
+d = 5
+d = c - 4
+print(d)
+# output
+",1.0,1.0
+"e = 6
+b = 3
+e = e - 6
+print(e)
+# output
+",0.0,0.0
+"d = 4
+c = 6
+print(d + c)
+# output
+",10.0,10.0
+"b = 8
+a = 9
+e = 5 / 2
+print(b * a)
+# output
+",72.0,72.0
+"e = 4
+print(e)
+# output
+",4.0,4.0
+"b = 2
+c = 3
+print(b / b)
+# output
+",1.0,1.0
+"d = 8
+d = 0
+c = 7 - 1
+print(c)
+# output
+",6.0,6.0
+"e = 9
+print(e * e)
+# output
+",81.0,81.0
+"e = 1
+a = 6
+print(a / 6)
+# output
+",1.0,1.0
+"d = 5
+d = 9
+e = 0 - d
+print(d * d)
+# output
+",81.0,81.0
+"d = 5
+d = d * d
+print(d)
+# output
+",25.0,25.0
+"b = 7
+a = 2
+print(a)
+# output
+",2.0,2.0
+"b = 3
+e = 1
+e = 9 + e
+print(e)
+# output
+",10.0,10.0
+"e = 9
+e = 7 * e
+print(e + 7)
+# output
+",70.0,103.0
+"e = 3
+c = 2
+d = 1 + 5
+print(c * c)
+# output
+",4.0,4.0
+"d = 9
+b = 5
+print(d)
+# output
+",9.0,9.0
+"d = 6
+e = 6
+print(e)
+# output
+",6.0,6.0
+"e = 6
+e = 7
+print(e)
+# output
+",7.0,7.0
+"b = 9
+a = 2
+b = a - 9
+print(a - 4)
+# output
+",-2.0,-2.0
+"e = 2
+c = 3
+print(c)
+# output
+",3.0,3.0
+"b = 4
+b = 3 - b
+print(b)
+# output
+",-1.0,-1.0
+"d = 0
+d = 7
+print(d + 3)
+# output
+",10.0,10.0
+"d = 9
+print(d - 9)
+# output
+",0.0,0.0
+"c = 4
+print(c)
+# output
+",4.0,4.0
+"c = 3
+e = 2
+print(c)
+# output
+",3.0,3.0
+"e = 6
+c = 4
+print(e / 9)
+# output
+",0.6666666666666666,0.6666666666666666
+"a = 4
+print(a)
+# output
+",4.0,4.0
+"c = 8
+c = 0
+b = 9 * c
+print(b)
+# output
+",0.0,0.0
+"a = 8
+a = 9
+print(a)
+# output
+",9.0,9.0
+"a = 0
+e = a + a
+print(a + a)
+# output
+",0.0,0.0
+"a = 7
+e = 6
+print(a + a)
+# output
+",14.0,14.0
+"b = 8
+e = b + b
+print(e)
+# output
+",16.0,16.0
+"a = 3
+e = 9
+b = e - a
+print(b)
+# output
+",6.0,6.0
+"e = 4
+e = 4 * e
+print(e / 8)
+# output
+",2.0,1.0
+"d = 7
+c = 2 + 8
+print(d * 1)
+# output
+",7.0,7.0
diff --git a/generalization/tokenizing.py b/generalization/tokenizing.py
new file mode 100644
index 0000000..14a446e
--- /dev/null
+++ b/generalization/tokenizing.py
@@ -0,0 +1,82 @@
+import argparse
+import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer
+
+import transformers
+
+transformers.logging.set_verbosity_error()
+
+def tokenize_data(train_file="../data/train.txt",
+                  test_file="../data/val.txt",
+                  tokenizer_name="codellama/CodeLlama-7b-hf",
+                  train_output_dir="data/tokenized_train",
+                  val_output_dir="data/tokenized_val"):
+    print("Tokenizing data...")
+
+    # Read the training and test data
+    with open(train_file) as f:
+        train_data = f.read()
+
+    with open(test_file) as f:
+        test_data = f.read()
+
+    # Split the snippets into individual examples
+    train_snippets = train_data.split('\n\n')
+    test_snippets = test_data.split('\n\n')
+
+    # Create datasets from the snippets
+    train_dataset = Dataset.from_pandas(pd.DataFrame({'snippets': train_snippets}))
+    eval_dataset = Dataset.from_pandas(pd.DataFrame({'snippets': test_snippets}))
+
+    # Load the tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+    tokenizer.add_eos_token = True
+    tokenizer.pad_token_id = 0
+    tokenizer.padding_side = "left"
+
+    # Function to tokenize a prompt
+    def tokenize(prompt):
+        result = tokenizer(
+            prompt,
+            truncation=True,
+            max_length=512,
+            padding=False,
+            return_tensors=None,
+        )
+
+        # For self-supervised learning, labels are also the inputs
+        result["labels"] = result["input_ids"].copy()
+
+        return result
+
+    # Function to generate and tokenize a prompt
+    def generate_and_tokenize_prompt(data_point):
+        full_prompt = data_point["snippets"]
+
+        return tokenize(full_prompt)
+
+    # Tokenize the training and validation datasets
+    print("Tokenizing datasets...")
+    tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
+    tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)
+
+    # Save the tokenized datasets to disk
+    print(f"Saving tokenized datasets to {train_output_dir} and {val_output_dir}...")
+    tokenized_train_dataset.save_to_disk(train_output_dir)
+    tokenized_val_dataset.save_to_disk(val_output_dir)
+    
+    print("Tokenization complete.")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Tokenize data for language model training.")
+    parser.add_argument("--train_file", type=str, default="../data/train.txt", help="Path to the training file")
+    parser.add_argument("--test_file", type=str, default="../data/val.txt", help="Path to the test file")
+    parser.add_argument("--tokenizer_name", type=str, default="codellama/CodeLlama-7b-hf", help="Name or path of the tokenizer")
+    parser.add_argument("--train_output_dir", type=str, default="data/tokenized_train", help="Path to save the tokenized training dataset")
+    parser.add_argument("--val_output_dir", type=str, default="data/tokenized_val", help="Path to save the tokenized validation dataset")
+    
+    args = parser.parse_args()
+
+    tokenize_data(args.train_file, args.test_file, args.tokenizer_name, args.train_output_dir, args.val_output_dir)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..122940a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+anytree==2.12.1
+tqdm==4.66.2
+requests==2.31.0
+numpy==1.26.4
+pandas==2.2.1
+torch==2.2.2
+transformers==4.30.2
+datasets==2.18.0
+psutil==5.9.8
+argparse==1.1
+peft @ git+https://github.com/huggingface/peft.git@e536616888d51b453ed354a6f1e243fecb02ea08
diff --git a/tasks/line _execution_counting/README.md b/tasks/line _execution_counting/README.md
new file mode 100644
index 0000000..d7145c0
--- /dev/null
+++ b/tasks/line _execution_counting/README.md	
@@ -0,0 +1,30 @@
+# Line execution counting guide
+
+the task consists of the model predicting the count of how many lines of code will be executed given a random python code snippet
+
+
+this folder contains .py and .ipynb files achieving the following goals :
+
+## Data generation
+
+[tinypy_generator.py](./tinypy_generator.py) is the main file that generates the dataset in the form of labeled python code snippets, this file is a modified version of the original [tinypy_generator](https://github.com/MarwaNair/TinyPy-Generator) which generates python code snippets and labels it with the output of that code.
+
+for our case, we will keep the code snippet, and just change the label, instead of labeling with the code output, we will label with the line count, hence the modification.
+
+the modification that you would notice after exploring that [file](./tinypy_generator.py) is that a new independent method has been added - given a python code snippet, the method returns its count -
+
+for the sake of experiments, that same method has been written in a separate demonstrative .py file : [lineCounter.py](./lineCounter.py), the method accepts any functionning python code as an input.
+
+a detailed explanation of how that method works is provided in the following [Docs](https://docs.google.com/document/d/1Fz0KGN1wb-6rVqU0BdrTBSaodM-pksPXhfoAGQbU7Dk/edit?usp=sharing) file.
+
+## Data split
+
+before moving on to finetuning, [prepare.py](./prepare.py) makes it possible to format the data generated previously, from a .txt file it splits the code examples into training, evaluation and test examples, saved in 3 separate files, in addition to generating a meta.pkl file that will help in the next stage "retrieve the needed information about the generated data"...
+
+## Finetuning the model
+
+[finetuning.ipynb](./finetuning.ipynb) contains the entire process "explained in the comments" that follows data generation :
+- Data preparation "tokenization.. etc"
+- Model structure definition "and lora implementation"
+- Training "or finetuning if lora is activated"
+- Evaluation
\ No newline at end of file
diff --git a/tasks/line _execution_counting/finetuning.ipynb b/tasks/line _execution_counting/finetuning.ipynb
new file mode 100644
index 0000000..4d6b600
--- /dev/null
+++ b/tasks/line _execution_counting/finetuning.ipynb	
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"aa556b17-8ea0-4788-aea8-8d6259526157","_uuid":"a14f6813-426a-4666-9280-7ed88ebdb85e","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:30.282920Z","iopub.status.busy":"2024-09-23T12:47:30.282569Z","iopub.status.idle":"2024-09-23T12:47:30.288207Z","shell.execute_reply":"2024-09-23T12:47:30.287329Z","shell.execute_reply.started":"2024-09-23T12:47:30.282894Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["import random\n","import os\n","import pickle\n","import time\n","import datetime\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","from torch.optim.lr_scheduler import StepLR\n","import numpy as np\n","import pandas as pd\n","from tqdm import tqdm\n","import re"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"063623b6-121d-4c18-a660-93d2f1be3305","_uuid":"f10e66ef-f466-4cfc-8ddb-594df92adb45","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:31.741666Z","iopub.status.busy":"2024-09-23T12:47:31.741274Z","iopub.status.idle":"2024-09-23T12:47:31.747701Z","shell.execute_reply":"2024-09-23T12:47:31.746726Z","shell.execute_reply.started":"2024-09-23T12:47:31.741638Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Set the random seed for reproducibility\n","seed = 42\n","torch.manual_seed(seed) \n","random.seed(seed)\n","np.random.seed(seed)\n","\n","# Set the device to GPU if available, otherwise CPU\n","device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n","print(f\"Device set to {device}.\")"]},{"cell_type":"markdown","metadata":{"_cell_guid":"4776333c-08cd-4127-bea7-d7ec8898df7b","_uuid":"f61836e4-3f71-432d-8c50-9de1ff2e05e0","trusted":true},"source":["# Data Preparation"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"fcc4b173-f5e5-4110-b14f-46a8fa6da9ae","_uuid":"0aa1c1b8-a945-4baa-8d46-3a08056a9004","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:33.450870Z","iopub.status.busy":"2024-09-23T12:47:33.450042Z","iopub.status.idle":"2024-09-23T12:47:33.455749Z","shell.execute_reply":"2024-09-23T12:47:33.454855Z","shell.execute_reply.started":"2024-09-23T12:47:33.450837Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Helper functions to load and save data\n","def save_data(data, file_path):\n","    with open(file_path, 'w') as f:\n","        f.write(data)\n","\n","def load_data(file_path):\n","    with open(file_path, 'r') as f:\n","        return f.read()"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"9731ee3f-b4d1-4b6e-afb2-859c56bef6c6","_uuid":"3da5ca68-e0d7-4aed-b89f-5f2a4ab910d9","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:44.813132Z","iopub.status.busy":"2024-09-23T12:47:44.812785Z","iopub.status.idle":"2024-09-23T12:47:44.817401Z","shell.execute_reply":"2024-09-23T12:47:44.816456Z","shell.execute_reply.started":"2024-09-23T12:47:44.813103Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Directory where the data is stored \"must contain 4 files : train.txt, val.txt, test.txt and a meta.pkl file\"\n","DATA_DIR = \"/yourDataDirectoryHere\"\n","# Directory where the model is stored\n","MODEL_DIR = \"/yourModelDirectoryHere\""]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"ddae0037-0f42-425d-a2e9-4238f4c608f2","_uuid":"6d064118-585d-46a9-8f40-f9472fe879b4","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:46.392475Z","iopub.status.busy":"2024-09-23T12:47:46.391663Z","iopub.status.idle":"2024-09-23T12:47:46.403456Z","shell.execute_reply":"2024-09-23T12:47:46.402524Z","shell.execute_reply.started":"2024-09-23T12:47:46.392441Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Attempt to derive vocab_size from the dataset\n","\n","meta_path = os.path.join(DATA_DIR, 'meta.pkl')\n","vocab_size = None\n","\n","if os.path.exists(meta_path):\n","    with open(meta_path, 'rb') as f:\n","        meta = pickle.load(f)\n","    vocab_size = meta['vocab_size']\n","    print(f\"found vocab_size = {vocab_size} (inside {meta_path})\")\n","else:\n","    print(\"Meta file not found. Please ensure the meta.pkl file is present in the data directory.\")\n","\n","# Encode and decode functions for character-level Tokenzation \n","def encode(s):\n","    return [meta['stoi'][c] for c in s]\n","\n","def decode(l):\n","    return ''.join([meta['itos'][i] for i in l])"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"ff53a3e0-09ab-4396-90d9-cef86df0605b","_uuid":"1b2892b5-a904-4550-a8d6-ae8f51f1841f","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:48.476650Z","iopub.status.busy":"2024-09-23T12:47:48.476261Z","iopub.status.idle":"2024-09-23T12:47:49.391422Z","shell.execute_reply":"2024-09-23T12:47:49.390496Z","shell.execute_reply.started":"2024-09-23T12:47:48.476620Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Load data\n","train_data = load_data(os.path.join(DATA_DIR, 'train.txt'))\n","val_data = load_data(os.path.join(DATA_DIR, 'val.txt'))\n","test_data = load_data(os.path.join(DATA_DIR, 'test.txt'))\n","\n","# Encode data\n","train_ids = encode(train_data)\n","val_ids = encode(val_data)\n","test_ids = encode(test_data)\n","\n","# Save encoded data to bin files, make sure to choose \"Files only\" on the persistence option of the session so that you don't encode data each time\n","train_ids = np.array(train_ids, dtype=np.uint16)\n","val_ids = np.array(val_ids, dtype=np.uint16)\n","test_ids = np.array(test_ids, dtype=np.uint16)\n","\n","train_ids.tofile( 'train.bin')\n","val_ids.tofile( 'val.bin')\n","test_ids.tofile('test.bin')\n","\n","print(\"Encoded data saved as binary files.\")"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"125ce42e-8df9-4094-b0c5-242fcd99a597","_uuid":"6a2d1ac2-5ef7-441c-9837-050c59120ab9","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:51.225679Z","iopub.status.busy":"2024-09-23T12:47:51.225322Z","iopub.status.idle":"2024-09-23T12:47:51.230098Z","shell.execute_reply":"2024-09-23T12:47:51.229117Z","shell.execute_reply.started":"2024-09-23T12:47:51.225651Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["del(train_ids)\n","del(val_ids)\n","del(test_ids)"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"c53f3930-8d16-443d-a5ec-a6926f3f6cf4","_uuid":"9cd8ff5a-2170-4c53-be17-02ac7d0cffd9","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:52.915803Z","iopub.status.busy":"2024-09-23T12:47:52.915072Z","iopub.status.idle":"2024-09-23T12:47:52.920735Z","shell.execute_reply":"2024-09-23T12:47:52.919741Z","shell.execute_reply.started":"2024-09-23T12:47:52.915770Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Load encoded data\n","train_data = np.memmap(\"/kaggle/working/train.bin\", dtype=np.uint16, mode='r')\n","val_data = np.memmap(\"/kaggle/working/val.bin\", dtype=np.uint16, mode='r')"]},{"cell_type":"markdown","metadata":{"_cell_guid":"8574d987-cef6-47d1-b889-e8242a0bcd23","_uuid":"f4fc1523-1d72-49db-a3bc-8d521f236993","trusted":true},"source":["# Model"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"2d4305c5-c1c6-48b0-a048-953a98954854","_uuid":"1fd63d8c-f842-444c-9dc8-cab3263ae6e4","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:54.647417Z","iopub.status.busy":"2024-09-23T12:47:54.647052Z","iopub.status.idle":"2024-09-23T12:47:54.653828Z","shell.execute_reply":"2024-09-23T12:47:54.652930Z","shell.execute_reply.started":"2024-09-23T12:47:54.647386Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Hyperparameters for the GPT model\n","block_size = 256  # Maximum context length\n","n_embd = 372      # Embedding dimension\n","n_head = 6        # Number of attention heads\n","n_layer = 6       # Number of transformer blocks\n","dropout = 0       # Dropout rate\n","batch_size = 64   # Batch size for training\n","max_iters = 100_000  # Maximum number of iterations\n","learning_rate = 1e-3 # Initial Learning rate value\n","miles = [int(max_iters * m) for m in [0.7, 0.8, 0.9]]  # Milestones for learning rate decay as fractions of max_iters\n","eval_interval = 10_000 # Evaluation interval\n","eval_iters = 1000 # Number of iterations for evaluation\n","vocab_size = 53 # Vocabulary size\n","\n","# Model to be fine-tuned \"set the model name without .pth\" (Keep it empty for training from scratch)\n","model_name = 'yourModelNameWithoutExtensionHere'\n","\n","# LoRA Rank - Set it to 0 if you want to train from scratch or perform full fine-tuning\n","lora_r = 12\n","\n","compile = False"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-23T12:47:57.166947Z","iopub.status.busy":"2024-09-23T12:47:57.166102Z","iopub.status.idle":"2024-09-23T12:47:57.171883Z","shell.execute_reply":"2024-09-23T12:47:57.170912Z","shell.execute_reply.started":"2024-09-23T12:47:57.166910Z"},"trusted":true},"outputs":[],"source":["print(f\"Data in tokens: {len(train_data)}\")\n","iters4epoch = len(train_data)//(batch_size * block_size)\n","print(f\"Number of iters for one pseudo-epoch : {iters4epoch}\")\n","print(f\"Number of pseudo-epochs : {max_iters / iters4epoch:.2f}\")"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"17ff6e02-86d2-4f49-a384-be8c035377a7","_uuid":"9c3a2af2-99a7-4657-bb8d-168a3e8dfcfb","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:47:59.282904Z","iopub.status.busy":"2024-09-23T12:47:59.282430Z","iopub.status.idle":"2024-09-23T12:47:59.430364Z","shell.execute_reply":"2024-09-23T12:47:59.429483Z","shell.execute_reply.started":"2024-09-23T12:47:59.282864Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# defining the entire structure of the model, and in parallel implementing lora\n","class LayerNorm(nn.Module):\n","    \"\"\" LayerNorm with an optional bias. PyTorch's LayerNorm doesn't support simply bias=False \"\"\"\n","\n","    def __init__(self, ndim, bias):\n","        super().__init__()\n","        self.weight = nn.Parameter(torch.ones(ndim))\n","        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None\n","\n","    def forward(self, input):\n","        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)\n","\n","class Head(nn.Module):\n","    \"\"\"One head of self-attention.\"\"\"\n","\n","    def __init__(self, head_size):\n","        super().__init__()\n","        self.key = nn.Linear(n_embd, head_size, bias=False)\n","        self.query = nn.Linear(n_embd, head_size, bias=False)\n","        self.value = nn.Linear(n_embd, head_size, bias=False)\n","        self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')\n","        self.dropout = nn.Dropout(dropout)\n","\n","    def forward(self, x):\n","        B, T, C = x.shape\n","        k = self.key(x)   # (B, T, head_size)\n","        q = self.query(x) # (B, T, head_size)\n","        v = self.value(x) # (B, T, head_size)\n","\n","        # Apply scaled dot-product attention\n","        out = torch.nn.functional.scaled_dot_product_attention(\n","            q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True\n","        )\n","        \n","        return out\n","    \n","\n","class MultiHeadAttention(nn.Module):\n","    \"\"\"Multiple heads of self-attention in parallel.\"\"\"\n","\n","    def __init__(self, num_heads, head_size):\n","        super().__init__()\n","        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])\n","        self.proj = nn.Linear(n_embd, n_embd)\n","        self.dropout = nn.Dropout(dropout)\n","        \n","    def forward(self, x):\n","        # Concatenate the outputs from each head\n","        out = torch.cat([h(x) for h in self.heads], dim=-1)\n","        out = self.dropout(self.proj(out))\n","        return out\n","    \n","class FeedForward(nn.Module):\n","    \"\"\"A simple linear layer followed by a non-linearity.\"\"\"\n","\n","    def __init__(self, n_embd):\n","        super().__init__()\n","        self.net = nn.Sequential(\n","            nn.Linear(n_embd, 4 * n_embd, bias=False),\n","            nn.GELU(),\n","            nn.Linear(4 * n_embd, n_embd, bias=False),\n","            nn.Dropout(dropout),\n","        )\n","\n","    def forward(self, x):\n","        return self.net(x)\n","\n","class LinearLoRA(nn.Module):\n","    def __init__(self, original_layer, rank=8):\n","        super().__init__()\n","        self.original_layer = original_layer\n","        self.original_layer.weight.requires_grad = False\n","        self.rank = rank\n","        \n","        self.lora_a = nn.Parameter(torch.randn((original_layer.in_features, rank)))\n","        self.lora_b = nn.Parameter(torch.randn((rank, original_layer.out_features)))\n","        \n","        self.reset_parameters()\n","        \n","    def reset_parameters(self):\n","        nn.init.kaiming_uniform_(self.lora_a, a=np.sqrt(5))\n","        nn.init.zeros_(self.lora_b)\n","        \n","    def forward(self, x):\n","        lora_output = x @ self.lora_a @ self.lora_b\n","        return self.original_layer(x) + lora_output\n","    \n","class Block(nn.Module):\n","    \"\"\"Transformer block: communication followed by feedforward.\"\"\"\n","\n","    def __init__(self, n_embd, n_head):\n","        super().__init__()\n","        head_size = n_embd // n_head\n","        self.sa = MultiHeadAttention(n_head, head_size)\n","        self.ffwd = FeedForward(n_embd)\n","        self.ln1 = nn.LayerNorm(n_embd, bias=False)\n","        self.ln2 = nn.LayerNorm(n_embd, bias=False)\n","\n","    def forward(self, x):\n","        x = x + self.sa(self.ln1(x))\n","        x = x + self.ffwd(self.ln2(x))\n","        return x\n","\n","class GPT(nn.Module):\n","    \"\"\"GPT language model.\"\"\"\n","\n","    def __init__(self):\n","        super().__init__()\n","        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)\n","        self.position_embedding_table = nn.Embedding(block_size, n_embd)\n","        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])\n","        self.ln_f = nn.LayerNorm(n_embd, bias=False) \n","        self.lm_head = nn.Linear(n_embd, vocab_size)\n","\n","    def forward(self, idx, targets=None):\n","        B, T = idx.shape\n","\n","        # Token and position embeddings\n","        tok_emb = self.token_embedding_table(idx) # (B, T, n_embd)\n","        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T, n_embd)\n","        x = tok_emb + pos_emb # (B, T, n_embd)\n","        x = self.blocks(x) # (B, T, n_embd)\n","        x = self.ln_f(x) # (B, T, n_embd)\n","        logits = self.lm_head(x) # (B, T, vocab_size)\n","\n","        # Compute loss if targets are provided\n","        if targets is None:\n","            loss = None\n","        else:\n","            B, T, C = logits.shape\n","            logits = logits.view(B * T, C)\n","            targets = targets.view(B * T)\n","            loss = F.cross_entropy(logits, targets)\n","\n","        return logits, loss\n","    \n","    def generate(self, idx, max_new_tokens):\n","        \"\"\"Generate new tokens given an initial context `idx`.\"\"\"\n","        for _ in range(max_new_tokens):\n","            idx_cond = idx[:, -block_size:] # Crop to the last block_size tokens\n","            logits, _ = self(idx_cond)\n","            logits = logits[:, -1, :] # Focus on the last time step\n","            probs = F.softmax(logits, dim=-1) # Convert to probabilities\n","            idx_next = torch.multinomial(probs, num_samples=1) # Sample from the distribution\n","            idx = torch.cat((idx, idx_next), dim=1) # Append sampled index to the sequence\n","        return idx\n","    \n","    def activate_lora(self, r=8, heads_only=False, freeze_others=True):\n","        self.lora_rank = r\n","        self.replace_multihead_attention_recursion(heads_only)\n","        if freeze_others:\n","            self.freeze_parameters_except_lora_and_bias()\n","    \n","    def replace_multihead_attention_recursion(self, heads_only=False, model=None):\n","        children = self.named_children() if model is None else model.named_children()\n","        for name, module in children:\n","            if heads_only and name in {\"query\", \"key\", \"value\"}:\n","                # Replace with Lora SelfAttention\n","                new_layer = LinearLoRA(module, rank=self.lora_rank)\n","\n","                if model == None:\n","                    self.__setattr__(name, new_layer)\n","                else:\n","                    setattr(model, name, new_layer)\n","            \n","            elif isinstance(module, nn.Linear) and not heads_only:\n","                new_layer = LinearLoRA(module, rank=self.lora_rank)\n","                \n","                if model == None:\n","                    self.__setattr__(name, new_layer)\n","                else:\n","                    setattr(model, name, new_layer)\n","            \n","            else:\n","                # Recursive call for child modules\n","                self.replace_multihead_attention_recursion(heads_only, model=module)\n","                \n","                \n","    def freeze_parameters_except_lora_and_bias(self):\n","        for name, param in self.named_parameters():\n","            is_trainable = (\n","                \"lora_\" in name\n","                #(self.train_layer_norms and \"LayerNorm\" in name)\n","            )\n","\n","            param.requires_grad = is_trainable"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"a716f789-f605-42d0-9494-d8927ed09a6f","_uuid":"be441d8d-c18b-4694-b2ff-607aac4b11e6","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:48:03.987746Z","iopub.status.busy":"2024-09-23T12:48:03.987386Z","iopub.status.idle":"2024-09-23T12:48:03.998567Z","shell.execute_reply":"2024-09-23T12:48:03.997639Z","shell.execute_reply.started":"2024-09-23T12:48:03.987716Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Get random batch of data\n","def get_batch(split):\n","    data = train_data if split == 'train' else val_data\n","    ix = torch.randint(len(data) - block_size, (batch_size,))\n","    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])\n","    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])\n","    x, y = x.to(device), y.to(device)\n","    return x, y\n","\n","# Estimate loss on train and val splits\n","@torch.no_grad()\n","def estimate_loss():\n","    out = {}\n","    model.eval()\n","    for split in ['train', 'val']:\n","        losses = torch.zeros(eval_iters) \n","        for k in range(eval_iters):\n","            X, Y = get_batch(split)\n","            logits, loss = model(X, Y)\n","            losses[k] = loss.item()\n","        out[split] = losses.mean()\n","    model.train()\n","    return out\n","\n","\n","# Helper function to make large numbers of parameters human-readable\n","def human_readable(num):\n","    magnitude = 0\n","    while abs(num) >= 1000:\n","        magnitude += 1\n","        num /= 1000.0\n","    return '%.0f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-23T12:48:08.446054Z","iopub.status.busy":"2024-09-23T12:48:08.445693Z","iopub.status.idle":"2024-09-23T12:48:08.456231Z","shell.execute_reply":"2024-09-23T12:48:08.455320Z","shell.execute_reply.started":"2024-09-23T12:48:08.446025Z"},"trusted":true},"outputs":[],"source":["# load the language model\n","def load_model():\n","        \"\"\"\n","        Load pre-trained model based on the provided model name.\n","        \"\"\"\n","        model_path = os.path.join(MODEL_DIR, f\"{model_name}.pth\")\n","        if not os.path.exists(model_path):\n","            raise FileNotFoundError(f\"Model file '{model_path}' not found.\")\n","        \n","        model = GPT()\n","        print(\"Compiling the model...\\n\")\n","        r = -1\n","        if compile:\n","            try:\n","                model = torch.compile(model)  # requires PyTorch 2.0\n","            except Exception as e:\n","                pass\n","\n","            checkpoint = torch.load(model_path, map_location=device)\n","            if 'lora_rank' in checkpoint.keys():\n","                r = checkpoint['lora_rank']\n","                state = checkpoint['state_dict']\n","\n","                if r > 0:\n","                    model.activate_lora(r)\n","                model.load_state_dict(state)\n","            else:\n","                model.load_state_dict(checkpoint)\n","        else:\n","            checkpoint = torch.load(model_path, map_location=device)\n","            if 'lora_rank' in checkpoint.keys():\n","                r = checkpoint['lora_rank']\n","                state_dict = checkpoint['state_dict']\n","\n","                if r > 0:\n","                    model.activate_lora(r)\n","            else:\n","                state_dict = checkpoint\n","            \n","            state_dict_keys = map(lambda x: x.replace(\"_orig_mod.\", \"\"), state_dict.keys())\n","            state_dict = dict(zip(state_dict_keys, state_dict.values()))\n","            model.load_state_dict(state_dict)\n","\n","        m = model.to(device)\n","        return m, (r > 0)"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"21de39d0-d298-45ce-a590-c6be400f31e8","_uuid":"db1edcb0-7dae-40b8-99f0-3a524bd1311e","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:48:10.715656Z","iopub.status.busy":"2024-09-23T12:48:10.714845Z","iopub.status.idle":"2024-09-23T12:48:11.061542Z","shell.execute_reply":"2024-09-23T12:48:11.060652Z","shell.execute_reply.started":"2024-09-23T12:48:10.715624Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Initialize model and move it to the device (GPU)\n","if len(model_name) > 0:\n","    print(\"Loading model...\\n\")\n","    model, r_exists = load_model()\n","\n","else:\n","    model = GPT()\n","    m = model.to(device)\n","    r_exists = False\n","\n","    # compile the model\n","    if compile:\n","        print(\"compiling the model... (takes a ~minute)\")\n","        model = torch.compile(model)\n","\n","if lora_r > 0 and not r_exists:\n","    print(\"Activating LoRA...\")\n","    model.activate_lora(lora_r)\n","    model = model.to(device)\n","\n","num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)\n","num_parameters_hr = human_readable(num_parameters)\n","print(f'The model has {num_parameters_hr} trainable parameters')"]},{"cell_type":"markdown","metadata":{"_cell_guid":"ac1fe251-e0c8-4079-9da4-68aff59262f4","_uuid":"8cdf45cc-0d3a-43a9-b10d-5381799a21f2","trusted":true},"source":["# Training"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"e725706a-19a1-4e82-91b1-514dd0488f33","_uuid":"45093d41-9498-45e4-b93b-95b0b239c0af","collapsed":false,"execution":{"iopub.execute_input":"2024-09-11T16:44:05.970233Z","iopub.status.busy":"2024-09-11T16:44:05.969481Z","iopub.status.idle":"2024-09-11T16:44:07.752808Z","shell.execute_reply":"2024-09-11T16:44:07.751536Z","shell.execute_reply.started":"2024-09-11T16:44:05.970172Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Initialize optimizer\n","optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)\n","\n","# Initialize learning rate scheduler\n","scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=miles, gamma=0.1)"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"76b8e469-893d-4151-a175-99b54dbabe60","_uuid":"534a6c6a-e6b8-4632-8078-86aab93500de","collapsed":false,"execution":{"iopub.execute_input":"2024-09-11T10:57:07.371504Z","iopub.status.busy":"2024-09-11T10:57:07.371046Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Get current date and hour to get track of experiments\n","now = datetime.datetime.now()\n","date_hour = now.strftime(\"%Y-%m-%d_%H-%M\")\n","\n","# Train\n","# Start training timer\n","start_time = time.time()\n","\n","# Training loop\n","for iter in range(max_iters):\n","\n","    # evaluate the model on the train and val splits and log the losses\n","    if iter % eval_interval == 0:\n","        losses = estimate_loss()\n","        print(f'iter {iter:5d} | train loss {losses[\"train\"]:.4f} | val loss {losses[\"val\"]:.4f}')\n","        \n","    # train the model for one iteration\n","    xb, yb = get_batch('train')\n","\n","    # forward passd\n","    logits, loss = model(xb, yb)\n","    optimizer.zero_grad(set_to_none=True)\n","    #loss.requires_grad = True\n","    loss.backward()\n","    optimizer.step()\n","\n","    # Step the scheduler\n","    scheduler.step()\n","\n","# End training timer\n","end_time = time.time()\n","print(f'Training time: {(end_time - start_time) / 60}  min')\n","\n","# Save the trained model\n","model_path = f\"{num_parameters_hr}_{date_hour}.pth\"\n","checkpoint = {\n","    'lora_rank': model.lora_rank if(hasattr(model, \"lora_rank\")) else -1,\n","    'state_dict': model.state_dict()\n","}\n","\n","torch.save(checkpoint, model_path)\n","print(f\"Model saved to {model_path}\\n\")"]},{"cell_type":"markdown","metadata":{"_cell_guid":"e831564c-6b76-489b-98b0-69cad098fdd6","_uuid":"facd8250-1fd4-4486-a9a6-f099df266caf","trusted":true},"source":["# Evaluation"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"d8071f1a-961b-4410-ae36-ba54b5b525d0","_uuid":"f4e10d4c-a4c8-4e6b-891e-f3d14947adfb","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:48:15.483123Z","iopub.status.busy":"2024-09-23T12:48:15.482531Z","iopub.status.idle":"2024-09-23T12:48:15.490084Z","shell.execute_reply":"2024-09-23T12:48:15.489192Z","shell.execute_reply.started":"2024-09-23T12:48:15.483092Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["test_data = np.memmap('test.bin', dtype=np.uint16, mode='r')"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"f3d6ae4b-e069-43bd-be3f-9e46f19146d3","_uuid":"2e9f95ba-ca83-48bc-bb18-8910efc37422","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:48:20.950761Z","iopub.status.busy":"2024-09-23T12:48:20.950432Z","iopub.status.idle":"2024-09-23T12:48:20.961347Z","shell.execute_reply":"2024-09-23T12:48:20.960565Z","shell.execute_reply.started":"2024-09-23T12:48:20.950737Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Evaluate example \"line execution counting\"\n","def evaluate_example(model, example, max_new_tokens=30):\n","    \n","    # Split example and determine maximum new tokens allowed\n","    splited_example = example.split(\"# count\")\n","    if not (\"for\" in splited_example[0]):\n","        max_new_tokens = 22\n","    # Encode prompt and prepare for evaluation    \n","    encoded_example = torch.tensor(encode(splited_example[0] + \"# count\"), dtype=torch.long).unsqueeze(0).to(device)\n","    prompt_text = splited_example[0] + \"# count\"\n","    result_example = splited_example[-1]\n","    \n","    # Extract real results from example\n","    real_results = [float(match.group()) for match in re.finditer(r\"(?<=# )-?\\d+(\\.\\d+)?\", result_example.split('\\n\\n')[0].replace(\"\\n\", \"\"))]\n","    \n","    # Generate response from model and extract generated results\n","    try:\n","        response = decode(model.generate(encoded_example, max_new_tokens=max_new_tokens)[0].tolist())\n","        splited_response = response.split(\"# count\")\n","        result_response = splited_response[-1]\n","        generated_results = [float(match.group()) for match in re.finditer(r\"(?<=# )-?\\d+(\\.\\d+)?\", result_response.split('\\n\\n')[0].replace(\"\\n\", \"\"))]\n","    except:\n","        generated_results = \"error\"\n","    return prompt_text, real_results, generated_results\n","\n","\n","\n","# Write results to file\n","def write_results_to_file(output_file, prompt, real_results, generated_results):\n","    df = pd.DataFrame({\n","        'Prompt': prompt,\n","        'Real_Results': real_results,\n","        'Generated_Results': generated_results\n","    })\n","    df.to_csv(output_file, index=False)"]},{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"2536ece9-1d3c-4373-b308-fd1049f3297f","_uuid":"7b21f8fd-2e4c-443b-8120-e0af732bf558","collapsed":false,"execution":{"iopub.execute_input":"2024-09-23T12:48:31.214124Z","iopub.status.busy":"2024-09-23T12:48:31.213222Z","iopub.status.idle":"2024-09-23T13:32:13.381039Z","shell.execute_reply":"2024-09-23T13:32:13.380177Z","shell.execute_reply.started":"2024-09-23T12:48:31.214089Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Evaluation Loop\n","\n","# Split examples and initialize lists for results\n","examples = decode(test_data).split(\"\\n\\n\")\n","examples = [example for example in examples if example]\n","# Taking a subset of the examples for short \"aimed for verification purposes\" evaluations\n","example_subset = examples[:5000]\n","# Start evaluation process\n","prompt = []\n","real_results = []\n","generated_results = []\n","\n","# Iterate through examples and evaluate the model on each one\n","for example in tqdm(example_subset):\n","    prompt_text, real_result, result = evaluate_example(model, example)\n","    prompt.append(prompt_text)\n","    real_results.append(real_result)\n","    generated_results.append(result)\n","\n","# Calculate and print accuracy\n","correct_count = sum(1 for real, generated in zip(real_results, generated_results) if real == generated)\n","accuracy = correct_count / len(generated_results)\n","print(f\"Accuracy: {accuracy * 100:.2f}%\")\n","\n","# Store accuracy in a file\n","with open(\"accuracy.txt\", 'w') as f:\n","    f.write(f\"Accuracy: {accuracy * 100:.2f}%\\n\")\n","\n","# Store predictions in a CSV file\n","    write_results_to_file(\"predictions.csv\", prompt, real_results, generated_results)"]}],"metadata":{"kaggle":{"accelerator":"gpu","dataSources":[{"datasetId":5419152,"sourceId":9104825,"sourceType":"datasetVersion"},{"datasetId":5544565,"sourceId":9174472,"sourceType":"datasetVersion"},{"datasetId":5546822,"sourceId":9177797,"sourceType":"datasetVersion"},{"datasetId":5527817,"sourceId":9194009,"sourceType":"datasetVersion"},{"datasetId":5559645,"sourceId":9196288,"sourceType":"datasetVersion"},{"datasetId":5560892,"sourceId":9198028,"sourceType":"datasetVersion"},{"datasetId":5560896,"sourceId":9198035,"sourceType":"datasetVersion"},{"datasetId":5560904,"sourceId":9198045,"sourceType":"datasetVersion"},{"datasetId":5566438,"sourceId":9206254,"sourceType":"datasetVersion"},{"datasetId":5592996,"sourceId":9245526,"sourceType":"datasetVersion"},{"datasetId":5596284,"sourceId":9250376,"sourceType":"datasetVersion"},{"datasetId":5603809,"sourceId":9261202,"sourceType":"datasetVersion"},{"datasetId":5603815,"sourceId":9261210,"sourceType":"datasetVersion"},{"datasetId":5628994,"sourceId":9297219,"sourceType":"datasetVersion"},{"datasetId":5628996,"sourceId":9297222,"sourceType":"datasetVersion"},{"datasetId":5628998,"sourceId":9297227,"sourceType":"datasetVersion"},{"datasetId":5628999,"sourceId":9297228,"sourceType":"datasetVersion"},{"datasetId":5629001,"sourceId":9297232,"sourceType":"datasetVersion"},{"datasetId":5629005,"sourceId":9297237,"sourceType":"datasetVersion"},{"datasetId":5670920,"sourceId":9354642,"sourceType":"datasetVersion"},{"datasetId":5673838,"sourceId":9358533,"sourceType":"datasetVersion"},{"datasetId":5673878,"sourceId":9358581,"sourceType":"datasetVersion"},{"datasetId":5676378,"sourceId":9361789,"sourceType":"datasetVersion"},{"datasetId":5676476,"sourceId":9361942,"sourceType":"datasetVersion"},{"datasetId":5680088,"sourceId":9366638,"sourceType":"datasetVersion"},{"datasetId":5681041,"sourceId":9367903,"sourceType":"datasetVersion"},{"datasetId":5707886,"sourceId":9402486,"sourceType":"datasetVersion"},{"datasetId":5708526,"sourceId":9403279,"sourceType":"datasetVersion"},{"datasetId":5708753,"sourceId":9403553,"sourceType":"datasetVersion"},{"datasetId":5720522,"sourceId":9418762,"sourceType":"datasetVersion"},{"datasetId":5749118,"sourceId":9457179,"sourceType":"datasetVersion"},{"datasetId":5749126,"sourceId":9457191,"sourceType":"datasetVersion"},{"datasetId":5752981,"sourceId":9462317,"sourceType":"datasetVersion"},{"datasetId":5753388,"sourceId":9462832,"sourceType":"datasetVersion"},{"modelId":103985,"modelInstanceId":79512,"sourceId":94818,"sourceType":"modelInstanceVersion"},{"modelId":104098,"modelInstanceId":79617,"sourceId":94938,"sourceType":"modelInstanceVersion"},{"modelId":106026,"modelInstanceId":81700,"sourceId":97385,"sourceType":"modelInstanceVersion"},{"modelId":106655,"modelInstanceId":82335,"sourceId":98147,"sourceType":"modelInstanceVersion"},{"modelId":107006,"modelInstanceId":82700,"sourceId":98573,"sourceType":"modelInstanceVersion"},{"modelId":107017,"modelInstanceId":82711,"sourceId":98585,"sourceType":"modelInstanceVersion"},{"modelId":108993,"modelInstanceId":84758,"sourceId":101069,"sourceType":"modelInstanceVersion"},{"isSourceIdPinned":true,"modelId":109445,"modelInstanceId":85225,"sourceId":101650,"sourceType":"modelInstanceVersion"},{"isSourceIdPinned":true,"modelId":117231,"modelInstanceId":93025,"sourceId":111042,"sourceType":"modelInstanceVersion"},{"modelId":121705,"modelInstanceId":97518,"sourceId":116074,"sourceType":"modelInstanceVersion"},{"isSourceIdPinned":true,"modelId":124007,"modelInstanceId":99834,"sourceId":118695,"sourceType":"modelInstanceVersion"},{"isSourceIdPinned":true,"modelId":124376,"modelInstanceId":100207,"sourceId":119159,"sourceType":"modelInstanceVersion"}],"dockerImageVersionId":30747,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}
diff --git a/tasks/line _execution_counting/lineCounter.py b/tasks/line _execution_counting/lineCounter.py
new file mode 100644
index 0000000..7964dbd
--- /dev/null
+++ b/tasks/line _execution_counting/lineCounter.py	
@@ -0,0 +1,47 @@
+import sys
+from io import StringIO
+from contextlib import redirect_stdout
+
+def line_counter(code_snippet):
+    """
+    this function counts how many lines of code in total have been executed 
+    the function follows the following rules :
+        - a line is not counted if :
+            - it falls in a condition bloc where the condition is not verified
+            - it falls in a loop where the number of iterations is equal to zero
+        - a line is counted as much as it has been iterated through "if it sits in a for loop bloc for example "
+    """
+    counter = 0
+    
+    def trace_lines(frame, event, arg):
+        nonlocal counter # declaring the outer variable
+        if event == 'line': # every time the tracer detects the execution of a line of code
+            filename = frame.f_code.co_filename
+            if filename == '<string>' : # counting only the lines that are in the code snippet we provided "and not in some other internal libraries"
+                counter += 1 # increment the global counter
+        return trace_lines
+    
+
+    # Set the trace function
+    sys.settrace(trace_lines)
+
+    # Capture the output of the program.
+    SIO = StringIO()
+    with redirect_stdout(SIO):
+        # executing the code, the execution is being traced by the trace_lines() function that has been set previously
+        exec(code_snippet,{'__file__': '<string>'}) # Execute the code and setting the "fake file" name to <string> so that we can recognise this code snippet later in trace_lines()
+
+    # Disable the trace function
+    sys.settrace(None)
+
+    return counter
+
+code_snippet = """e = 6
+e = 0
+e = 7
+if not (e != e) or ( e <= e) :
+	print(e)
+else :
+	print(e)"""
+number = line_counter(code_snippet)
+print(f"\n{number} lines executed successfully\n")
\ No newline at end of file
diff --git a/tasks/line _execution_counting/prepare.py b/tasks/line _execution_counting/prepare.py
new file mode 100644
index 0000000..52612cd
--- /dev/null
+++ b/tasks/line _execution_counting/prepare.py	
@@ -0,0 +1,84 @@
+import os
+import pickle
+import requests
+import numpy as np
+
+# change the file name which contains the entire dataset, so that it can be fragmented later on to : train, eval and test data
+input_file_path = os.path.join(os.path.dirname(__file__),  'fileNameHere.txt' )
+
+with open(input_file_path, 'r') as f:
+    data = f.read()
+print(f"length of dataset in characters: {len(data):,}\n")
+
+
+# get all the unique characters that occur in this text
+chars = sorted(list(set(data)))
+vocab_size = len(chars)
+print("all the unique characters:", ''.join(chars))
+print(f"vocab size: {vocab_size:,}")
+
+# create a mapping from characters to integers
+stoi = { ch:i for i,ch in enumerate(chars) }
+itos = { i:ch for i,ch in enumerate(chars) }
+def encode(s):
+    return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+def decode(l):
+    ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+
+# save the meta information as well, to help us encode/decode later
+meta = {
+    'vocab_size': vocab_size,
+    'itos': itos,
+    'stoi': stoi,
+}
+with open(f'meta.pkl', 'wb') as f:
+    pickle.dump(meta, f)
+
+
+# split by examples using "\n\n"
+examples = data.split("\n\n")[:-1]
+n = len(examples)
+print(f"total number of examples: {n:,}\n")
+# shuffle the examples
+np.random.shuffle(examples)
+
+# split into train, val, and test sets
+train_examples = examples[:int(n*0.8)]
+val_examples = examples[int(n*0.8):int(n*0.9)]
+test_examples = examples[int(n*0.9):]
+
+# join the examples back into strings
+train_data = "\n\n".join(train_examples)
+val_data = "\n\n".join(val_examples)
+test_data = "\n\n".join(test_examples)
+
+
+
+# Save train, val, and test sets to separate files
+with open(os.path.join(os.path.dirname(__file__), 'train.txt'), 'w') as f:
+    f.write(train_data)
+with open(os.path.join(os.path.dirname(__file__), 'val.txt'), 'w') as f:
+    f.write(val_data)
+with open(os.path.join(os.path.dirname(__file__), 'test.txt'), 'w') as f:
+    f.write(test_data)
+
+
+
+
+# encode both to integers
+train_ids = encode(train_data)
+val_ids = encode(val_data)
+test_ids = encode(test_data)
+print(f"train has {len(train_ids):,} tokens for {len(train_examples):,} examples")
+print(f"val has {len(val_ids):,} tokens for {len(val_examples):,} examples")
+print(f"test has {len(test_ids):,} tokens for {len(test_examples):,} examples\n")
+
+# export to bin files
+train_ids = np.array(train_ids, dtype=np.uint16)
+val_ids = np.array(val_ids, dtype=np.uint16)
+test_ids = np.array(test_ids, dtype=np.uint16)
+train_ids.tofile(os.path.join(os.path.dirname(__file__), 'train.bin'))
+val_ids.tofile(os.path.join(os.path.dirname(__file__),   'val.bin'))
+test_ids.tofile(os.path.join(os.path.dirname(__file__), 'test.bin'))
+
diff --git a/tasks/line _execution_counting/tinypy_generator.py b/tasks/line _execution_counting/tinypy_generator.py
new file mode 100644
index 0000000..087c76d
--- /dev/null
+++ b/tasks/line _execution_counting/tinypy_generator.py	
@@ -0,0 +1,358 @@
+from anytree import Node, RenderTree
+import random
+from io import StringIO
+from contextlib import redirect_stdout
+import argparse  
+import time
+from tqdm.auto import tqdm
+import hashlib
+import os
+import psutil
+import sys
+
+
+class CodeGenerator:
+    def __init__(self):
+        """
+        Initialize the CodeGenerator object with the given context-free grammar rules.
+
+        """
+        
+        self.init_count = 0
+        self.max_init = 0
+        # Dictionary containing context-free grammar rules.
+        self.cfg_rules = {
+                # Variables and digits
+                "VARIABLE": ["a", "b", "c", "d", "e"],
+                "DIGIT": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
+
+                # Operators
+                "ARITHMETIC_OPERATOR": ["+", "-", "*", "/"],
+                "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
+                "LOGICAL_OPERATOR_INFIX": ["and", "or"],
+                "LOGICAL_OPERATOR_PREFIX": ["not"],
+                "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", "LOGICAL_OPERATOR_PREFIX"],
+                "OPERATOR": ["ARITHMETIC_OPERATOR"], 
+
+                # Formatting
+                "NEW_LINE": ["\n"],
+                "TAB_INDENT": ["\t"],
+                "BRACKET_OPEN": ['('],
+                "BRACKET_CLOSE": [')'],
+                "EQUALS": ["="],
+                "COLON": [":"],
+                "COMMA": [","],
+
+
+                # Keywords
+                "IF": ["if"],
+                "ELIF": ["elif"],
+                "ELSE": ["else"],
+                "FOR": ["for"],
+                "IN": ["in"],
+                "RANGE": ["range"],
+                "WHILE": ["while"],
+                "PRINT": ["print"],
+
+                # Terms and expressions
+                "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
+                "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
+                "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
+                "DISPLAY_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE EXPRESSION_IDENTIFIER" ,
+                                        "EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE DIGIT"],
+
+                # Initializations and assignments
+                "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION", 
+                                              "INITIALIZATION"],
+
+                "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
+
+                "SIMPLE_ASSIGNMENTS": ["VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE" , ""],
+                "ADVANCED_ASSIGNMENTS": ["VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE", 
+                                         "VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE" , 
+                                         ""],
+
+                "SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
+                                                 "ENCLOSED_EXPRESSION",
+                                                ], 
+
+                # Conditions
+                "SIMPLE_IF_STATEMENT": ["IF SPACE CONDITION SPACE COLON NEW_LINE"],
+                "ADVANCED_IF_STATEMENT": ["IF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
+                "SIMPLE_ELIF_STATEMENT": ["ELIF SPACE CONDITION SPACE COLON NEW_LINE"],
+                "ADVANCED_ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
+                "ELSE_STATEMENT": ["ELSE SPACE COLON NEW_LINE"],
+
+                "CHAIN_CONDITION": ["CHAIN_CONDITION SPACE LOGICAL_OPERATOR_INFIX SPACE ENCLOSED_CONDITION", 
+                                    "LOGICAL_OPERATOR_PREFIX SPACE ENCLOSED_CONDITION", 
+                                    "ENCLOSED_CONDITION"],
+                "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
+                "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
+                "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", 
+                                         "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
+                "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 
+
+                # Loops
+                "FOR_HEADER": ["FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON", 
+                               "FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON"],
+                "INITIAL": ["DIGIT"],
+                "FINAL": ["STEP * EXECUTION_COUNT + INITIAL - 1"],
+                "STEP": ["1", "2", "3"],
+                "EXECUTION_COUNT": [ "2", "3"],
+                "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],
+                "ADVANCED_FOR_LOOP": ["FOR_LOOP",
+                                      "FOR_HEADER NEW_LINE TAB_INDENT ADVANCED_DISPLAY"],
+
+
+                # Displaying 
+                "DISPLAY" : ["PRINT BRACKET_OPEN DISPLAY_IDENTIFIER BRACKET_CLOSE"],
+                "ADVANCED_DISPLAY" : ["DISPLAY",
+                                      "PRINT BRACKET_OPEN DISPLAY_EXPRESSION BRACKET_CLOSE"],
+
+
+                "LEVEL1.1": ["IDENTIFIER_INITIALIZATION SIMPLE_ASSIGNMENTS ADVANCED_DISPLAY"],
+                "LEVEL1.2": ["IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_DISPLAY"],
+                "LEVEL2.1": ["IDENTIFIER_INITIALIZATION SIMPLE_IF_STATEMENT TAB_INDENT DISPLAY", 
+                            "IDENTIFIER_INITIALIZATION SIMPLE_IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE SIMPLE_ELIF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY", 
+                            "IDENTIFIER_INITIALIZATION SIMPLE_IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY"],
+                "LEVEL2.2": ["IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_IF_STATEMENT TAB_INDENT ADVANCED_DISPLAY", 
+                            "IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_IF_STATEMENT TAB_INDENT ADVANCED_DISPLAY NEW_LINE ADVANCED_ELIF_STATEMENT TAB_INDENT ADVANCED_DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT ADVANCED_DISPLAY", 
+                            "IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_IF_STATEMENT TAB_INDENT ADVANCED_DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT ADVANCED_DISPLAY"],
+                "LEVEL3.1": ["IDENTIFIER_INITIALIZATION FOR_LOOP"],
+                "LEVEL3.2": ["IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_FOR_LOOP"],
+            
+                "ALL": ["LEVEL1.1", "LEVEL1.2","LEVEL2.1", "LEVEL2.2","LEVEL3.1", "LEVEL3.2"],
+            
+                    }
+
+    def line_counter(self,code_snippet):
+        """
+        this function counts how many lines of code in total have been executed 
+        the function follows the following rules :
+            - a line is not counted if :
+                - it falls in a condition bloc where the condition is not verified
+                - it falls in a loop where the number of iterations is equal to zero
+            - a line is counted as much as it has been iterated through "if it sits in a for loop bloc for example "
+        """
+        counter = 0
+        
+        def trace_lines(frame, event, arg):
+            nonlocal counter # declaring the outer variable
+            if event == 'line': # every time the tracer detects the execution of a line of code
+                filename = frame.f_code.co_filename
+                if filename == '<string>' : # counting only the lines that are in the code snippet we provided "and not in some other internal libraries"
+                    counter += 1 # increment the global counter
+            return trace_lines
+        
+
+        # Set the trace function
+        sys.settrace(trace_lines)
+
+        # Capture the output of the program.
+        SIO = StringIO()
+        with redirect_stdout(SIO):
+            # executing the code, the execution is being traced by the trace_lines() function that has been set previously
+            exec(code_snippet,{'__file__': '<string>'}) # Execute the code and setting the "fake file" name to <string> so that we can recognise this code snippet later in trace_lines()
+
+        # Disable the trace function
+        sys.settrace(None)
+
+        return counter
+    
+    def generate_code(self, symbol, assigned_identifiers, last_variable, parent=None):
+        """
+        Generate code recursively based on the context-free grammar rules.
+
+        Parameters:
+        - symbol (str): The symbol to generate code for.
+        - assigned_identifiers (set): Set of assigned identifiers.
+        - last_variable (set): Set of the last used variables.
+        - parent (Node): Parent node in the syntax tree.
+
+        Returns:
+        - str: The generated code.
+        """
+        node = Node(symbol, parent=parent)
+        
+        # If the symbol is a non-terminal, expand it using the CFG rules.
+        if symbol in self.cfg_rules:
+            # Initialization count.
+            if symbol == "IDENTIFIER_INITIALIZATION":
+                if self.init_count < self.max_init:
+                    self.init_count += 1
+                else:
+                    symbol = "INITIALIZATION"
+            # Choose a random rule for the symbol and split it into individual symbols.
+            rule = random.choice(self.cfg_rules[symbol])
+            symbols = rule.split(" ")
+            
+            # Recursively generate code for each symbol in the rule.
+            generated_symbols = [self.generate_code(s, assigned_identifiers, last_variable, node) for s in symbols]
+            
+            # Handle special case for "FINAL" symbol where we need to evaluate an expression.
+            if symbol == "FINAL":
+                return str(eval(''.join(generated_symbols)))
+                
+            # Add initialized variables to the assigned identifiers set.
+            if symbol == "INITIALIZATION":
+                assigned_identifiers.add(generated_symbols[0])
+
+            # Keep track of the last used variables for assignments.
+            if (symbol == "SIMPLE_ASSIGNMENTS") or (symbol == "ADVANCED_ASSIGNMENTS"):
+                if generated_symbols[0]:
+                    last_variable.add(generated_symbols[0])
+
+            return ''.join(generated_symbols)
+            
+        # Handle the terminal symbols.
+        elif symbol == "EXPRESSION_IDENTIFIER":
+            identifier = random.choice(tuple(assigned_identifiers)) if assigned_identifiers else random.choice(self.cfg_rules["DIGIT"])
+            return identifier
+
+        elif symbol == "DISPLAY_IDENTIFIER":
+            try:
+                return f"{tuple(last_variable)[0]}"
+            except:
+                return f"{random.choice(tuple(assigned_identifiers))}"
+        else:
+            return symbol
+
+    def print_tree(self, root):
+        """
+        Print the syntax tree using the RenderTree utility from the anytree module.
+
+        Parameters:
+        - root (Node): The root node of the syntax tree.
+        """
+        for pre, _, node in RenderTree(root):
+            print(f"{pre}{node.name}")
+
+    def generate_program(self, level):
+        """
+        Generate a program based on the specified level.
+
+        Parameters:
+        - level (str): The level of the program.
+
+        Returns:
+        - Tuple[Node, str]: The syntax tree root node and the generated program.
+        """
+        assigned = set()
+        last_variable = set()
+        root = Node("ROOT")
+
+        # Set the maximum number of initializations based on the level.
+        self.init_count = 0
+        if level == "1.1":
+            self.max_init = 1
+        elif level == "1.2":
+            self.max_init = 3
+        elif level == "3.1":
+            self.max_init = 2
+        elif level == "3.2":
+            self.max_init = 4
+        else:
+            self.max_init = 5
+            
+        # Choose a rule for the specified level and generate code.    
+        if level == "ALL" :
+            level_passed = level
+        else :
+            level_passed = "LEVEL" + level
+
+        program = self.generate_code(level_passed, assigned, last_variable, root)
+
+        return root, program.replace("SPACE", " ")
+    
+    def memory_usage(self):
+        """
+        Get the current memory usage of the process.
+
+        Returns:
+        - int: The memory usage in bytes.
+        """
+        process = psutil.Process(os.getpid())
+        mem_info = process.memory_info()
+        return mem_info.rss
+
+    def generate_and_write_programs(self, num_programs, level, filename='data.txt', deduplicate=True):
+        """
+        Generate and write a specified number of programs to a file.
+
+        Parameters:
+        - num_programs (int): Number of programs to generate and write.
+        - level (str): The level of the programs.
+        - filename (str): Name of the file to write the programs (default is 'data.txt').
+        - deduplicate (bool, optional): Whether to perform deduplication of generated programs (default is True).
+        """
+        start_time = time.time()   # Track the start time for performance measurement.
+        start_mem = self.memory_usage() # Track the initial memory usage.
+        max_tries = 1000 # Set the maximum number of tries for deduplication.
+        num_tries = 0 # Initialize the number of tries counter.
+        
+        with open(filename, 'w') as file:
+            
+            generated_programs = 0 # Initialize the counter for generated programs.
+            hashes = set() # Set to keep track of unique program hashes for deduplication.
+            pbar = tqdm(desc="Generation", total=num_programs)
+            
+            while generated_programs < num_programs:
+                try:
+                    root, program = self.generate_program(level) # Generate a program.
+                    
+                    count = self.line_counter(program)# count how many executed lines
+
+                    result = f"""# Snippet\n{program}\n# count\n# {count}""" # fuse the code snippet with its label "count"
+                    
+                    program_hash = hashlib.sha256(result.encode('utf-8')).hexdigest()
+
+                    if deduplicate:
+                        if program_hash not in hashes:
+                            hashes.add(program_hash) # Add the hash to the set if it's unique.
+                            file.write(result + '\n\n') # Write the program to the file.
+                            generated_programs += 1  # Increment the counter for generated programs.
+                            pbar.update(1)
+                            num_tries = 0 # Reset the tries counter.
+                        else:
+                            num_tries += 1 # Increment the tries counter.
+                            if num_tries >= max_tries:
+                                print("Hit max tries in deduplication, stopping generation.")
+                                break # Stop generation if max tries are reached.
+                    else:
+                        
+                        file.write(result + '\n\n') # Write the program to the file without deduplication.
+                        generated_programs += 1   # Increment the counter for generated programs.
+                        pbar.update(1)
+
+                except Exception as e:
+                    continue # Ignore code snippets containing division by zero error.
+
+
+        pbar.close()
+        end_time = time.time()  # Track the end time for performance measurement.
+        end_mem = self.memory_usage()  # Track the final memory usage.
+        deduplication_info = "with deduplication" if deduplicate else "without deduplication"
+        print(f"Code generation completed in {end_time - start_time:.2f} seconds.")
+        print(f"Memory used during code generation: {end_mem - start_mem} bytes")
+        print(f"Generated {generated_programs} {'unique ' if deduplicate else ''}programs {deduplication_info}.")
+        print(f"Programs are saved to {filename}.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Generate and write programs based on a specified level. ')
+    parser.add_argument('--num_programs', type=int, default=1000, help='Number of programs to generate and write (default is 1000)')
+    parser.add_argument('--level', default="ALL", help='The level of the programs (1.1, 1.2, 2.1, 2.2, 3.1, 3.2, ALL)')
+    parser.add_argument('--filename', default='data/data.txt', help='Name of the file to write the programs (default is data/data.txt)')
+    parser.add_argument('--deduplicate', action='store_true', default=True, help='Perform deduplication of generated programs (default is True)')
+
+    args = parser.parse_args()
+
+    valid_levels = ["1.1", "1.2", "2.1", "2.2", "3.1", "3.2", "ALL"]
+    if args.level not in valid_levels:
+        print(f"Error: Invalid level '{args.level}'. Please choose from {', '.join(valid_levels)}.")
+        return
+    code_generator = CodeGenerator()
+    code_generator.generate_and_write_programs(num_programs=args.num_programs, level=args.level, filename=args.filename,  deduplicate=args.deduplicate)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/tasks/operator_prediction/README.md b/tasks/operator_prediction/README.md
new file mode 100644
index 0000000..07227b7
--- /dev/null
+++ b/tasks/operator_prediction/README.md
@@ -0,0 +1,31 @@
+# Operator Prediction Task
+The task requires the model to predict a random operator in the code based on the output
+
+## Usage
+
+### Data Generation
+- Data should be generated based on an existing dataset of code snippets with their outputs, as follows:
+
+```bash
+python replacer.py --input_file_name input_data.txt --output_file_name output_data.txt
+```
+
+
+- If you want to generate a dataset from scratch, based on old data generation levels, you can run for example:
+
+```bash
+python old_generator.py --num_programs 1000 --level ALL --filename output_data.txt --deduplicate
+```
+
+
+### Data Preparation
+- Prepare (Tokenize and split) the data by running:
+
+```bash
+python prepare.py
+```
+
+This should generate the following files: `train.txt`, `test.txt`, `val.bin`, `train.bin`, `test.bin`, `val.bin`, and `meta.pkl`.
+
+## Contact
+- **Omar Farouk Zouak**: [omar.zouak@ensia.edu.dz](mailto:omar.zouak@ensia.edu.dz)
\ No newline at end of file
diff --git a/tasks/operator_prediction/prepare.py b/tasks/operator_prediction/prepare.py
new file mode 100644
index 0000000..f42dd09
--- /dev/null
+++ b/tasks/operator_prediction/prepare.py
@@ -0,0 +1,92 @@
+import os
+import pickle
+import requests
+import numpy as np
+import argparse
+
+from io import StringIO
+from contextlib import redirect_stdout
+
+parser = argparse.ArgumentParser(description='Prepare (tokenize and split) a dataset')
+parser.add_argument('--original_dataset', default='output_data.txt', help='Name of file containing original dataset')
+args = parser.parse_args()
+data_file_name = args.input_file_name
+
+input_file_path = os.path.join(os.path.dirname(__file__),  data_file_name)
+
+with open(input_file_path, 'r') as f:
+    data = f.read()
+print(f"length of dataset in characters: {len(data):,}\n")
+
+
+# get all the unique characters that occur in this text
+chars = sorted(list(set(data)))
+vocab_size = len(chars)
+print("all the unique characters:", ''.join(chars))
+print(f"vocab size: {vocab_size:,}")
+
+# create a mapping from characters to integers
+stoi = { ch:i for i,ch in enumerate(chars) }
+itos = { i:ch for i,ch in enumerate(chars) }
+def encode(s):
+    return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+def decode(l):
+    ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+
+# save the meta information as well, to help us encode/decode later
+meta = {
+    'vocab_size': vocab_size,
+    'itos': itos,
+    'stoi': stoi,
+}
+with open(f'meta.pkl', 'wb') as f:
+    pickle.dump(meta, f)
+
+
+# split by examples using "\n\n"
+examples = data.split("\n\n")[:-1]
+n = len(examples)
+print(f"total number of examples: {n:,}\n")
+# shuffle the examples
+np.random.shuffle(examples)
+
+# split into train, val, and test sets
+train_examples = examples[:int(n*0.8)]
+val_examples = examples[int(n*0.8):int(n*0.9)]
+test_examples = examples[int(n*0.9):]
+
+# join the examples back into strings
+train_data = "\n\n".join(train_examples)
+val_data = "\n\n".join(val_examples)
+test_data = "\n\n".join(test_examples)
+
+
+
+# Save train, val, and test sets to separate files
+with open(os.path.join(os.path.dirname(__file__), 'train.txt'), 'w') as f:
+    f.write(train_data)
+with open(os.path.join(os.path.dirname(__file__), 'val.txt'), 'w') as f:
+    f.write(val_data)
+with open(os.path.join(os.path.dirname(__file__), 'test.txt'), 'w') as f:
+    f.write(test_data)
+
+
+
+
+# encode both to integers
+train_ids = encode(train_data)
+val_ids = encode(val_data)
+test_ids = encode(test_data)
+print(f"train has {len(train_ids):,} tokens for {len(train_examples):,} examples")
+print(f"val has {len(val_ids):,} tokens for {len(val_examples):,} examples")
+print(f"test has {len(test_ids):,} tokens for {len(test_examples):,} examples\n")
+
+# export to bin files
+train_ids = np.array(train_ids, dtype=np.uint16)
+val_ids = np.array(val_ids, dtype=np.uint16)
+test_ids = np.array(test_ids, dtype=np.uint16)
+train_ids.tofile(os.path.join(os.path.dirname(__file__), 'train.bin'))
+val_ids.tofile(os.path.join(os.path.dirname(__file__),   'val.bin'))
+test_ids.tofile(os.path.join(os.path.dirname(__file__), 'test.bin'))
+
diff --git a/tasks/operator_prediction/replacer.py b/tasks/operator_prediction/replacer.py
new file mode 100644
index 0000000..937fbb9
--- /dev/null
+++ b/tasks/operator_prediction/replacer.py
@@ -0,0 +1,117 @@
+import random
+import re
+import signal
+from io import StringIO
+from contextlib import redirect_stdout
+import os
+import pickle
+import numpy as np
+from tqdm.auto import tqdm
+import argparse
+
+def run_code(code):
+        SIO = StringIO()
+        with redirect_stdout(SIO):
+            exec(code)
+        return SIO.getvalue().strip()
+
+def rand_operator(s):
+    indices = [m.start() for m in re.finditer(r'[+\-/*]', s)]
+
+    if len(indices) > 0:
+        return random.choice(indices)
+    
+    return -1
+
+def timeout_handler(signum, frame):
+    raise Exception("Code execution exceeded the time limit")
+
+def test_outputs(code, op_index, output, timeout=1e-1):
+    operators = {'+', '-', '*', '/'} - {code[op_index]}
+    
+    for operator in operators:
+        try:
+            signal.signal(signal.SIGALRM, timeout_handler)
+            signal.setitimer(signal.ITIMER_REAL, timeout)  # Timeout in seconds
+            
+            updated_code = code[:op_index] + operator + code[op_index+1:]
+            new_output = run_code(updated_code)
+            if new_output == output:
+                return False
+            
+        except Exception:
+            continue
+
+        finally:
+            # Disable the timer after execution
+            signal.setitimer(signal.ITIMER_REAL, 0)
+    
+    return True
+
+def update_code(code, op_index, output_pure):
+    output = '\n'.join([f'# {line}' if line else f'# ' for line in output_pure.split('\n')])
+
+    operator = code[op_index]
+    code = f"""{code}\n# output\n{output}"""
+
+    code = list(code)
+    code[op_index] = '#'
+    
+    updated_code = "".join(code) + f"""\n# operator\n# {operator}"""
+    return updated_code
+
+def replace_operator(code, output):
+    if(len(output) <= 1):
+        return code, False
+    
+    op_index = rand_operator(code)
+    if op_index < 0:
+        return code, False
+
+    isSucess = test_outputs(code, op_index, output)
+    if isSucess:
+        updated_code = update_code(code, op_index, output)
+        return updated_code, True
+    else:
+        return code, False
+
+
+parser = argparse.ArgumentParser(description='Create an operator prediction dataset from an output prediction one')
+parser.add_argument('--input_file_name', default='input_data.txt', help='Name of input file')
+parser.add_argument('--output_file_name', default='output_data.txt', help='Name of output file')
+
+args = parser.parse_args()
+input_file_path = args.input_file_name
+output_file_path = args.output_file_name
+
+input_file_path = os.path.join(os.path.dirname(__file__),  input_file_path)
+with open(input_file_path, 'r') as f:
+    data = f.read()
+print(f"Length of original dataset in characters: {len(data):,}")
+
+examples = data.split("\n\n")[:-1]
+print(f"Length of original dataset: {len(examples):,}\n")
+
+new_examples = []
+for i in tqdm(range(len(examples))):
+    example = examples[i]
+    code, output = example.split("\n# output\n")
+    output = output.replace("# ", '')
+
+    n_tries = 0
+    while n_tries < 3:
+        result, isSuccess = replace_operator(code, output)
+        if isSuccess:
+            break
+        n_tries += 1
+    
+    if n_tries < 3:
+        new_examples.append(result)
+
+print(f"Length of new dataset: {len(new_examples):,}")
+print(f"Lengths ratio: {100*len(new_examples)/len(examples):,}%\n") # Experimental ratio: ~73%
+
+new_data = "\n\n".join(new_examples)
+with open(os.path.join(os.path.dirname(__file__), output_file_path), 'w') as f:
+    f.write(new_data)
+print(f"Length of new dataset in characters: {len(new_data):,}\n")
\ No newline at end of file
diff --git a/tasks/test.txt b/tasks/test.txt
new file mode 100644
index 0000000..e69de29
diff --git a/venus.py b/venus.py
new file mode 100644
index 0000000..1ac6b6e
--- /dev/null
+++ b/venus.py
@@ -0,0 +1,165 @@
+import argparse
+import os
+import glob
+
+action_help = """Action, can be:
+1- ds ; to create a new dataset
+2- dp ; to create a new dataprep
+3- xg ; to create a new xperiment group
+3- xp ; to create a new xperiment 
+"""
+
+def generate_tags_string(tags):
+	if tags is not None:
+		tags_string = "-"+"-".join(tags.split(" "))
+	else:
+		tags_string = ""
+	return tags_string
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = "venus manager")
+	
+	parser.add_argument("--action", "-a", default = None, help = action_help)
+	parser.add_argument("--tags", "-t", default = None, help = "list of space separated tags for the new artifact")
+	parser.add_argument("--datasetid", "-dsi", default = None, help = "id of the dataset to create the dataprep in")
+	parser.add_argument("--xpgroupid", "-xgi", default = None, help = "is of the xperiment group to create the xperiment in")
+
+	args = parser.parse_args()
+	action = args.action
+	
+
+	## Creating a new dataset
+	if action == "ds":
+
+		# creating the id for the new dataset
+		max_id = -float("inf")
+		for dataset in glob.glob("datasets/*"):
+			if max_id < (id := int(dataset.split("/")[-1].split("-")[1])):
+				max_id = id
+		new_id = max_id + 1
+		
+		# creating the tags_string
+		tags_string = generate_tags_string(args.tags)
+
+		# creating the base folder boilerplate
+		DIR = f"datasets/dataset-{new_id}{tags_string}/"
+		os.makedirs(DIR)
+		with open(DIR+".readme.md", "w") as f:
+			f.write("# DESCRIPTION\n\n")
+			f.write("# OBTENTION\n\n")
+			f.write("# META-DATA\n\n")
+			f.write("# DATA-LOCATION\n\n")
+		os.makedirs(DIR+"data")
+		os.makedirs(DIR+"datapreps")
+	
+	
+	## Creating a new dataprep within some dataset
+	elif action == "dp":
+
+		# checking if the user provided the datasetid
+		if args.datasetid is None:
+			print("ERROR: dsi (dataset id) must be passed as argument to create a new dp (dataprep)")
+			exit(-1)
+		
+		# add a try catch block here in case the user passes something that isn't an integer
+		dsi = int(args.datasetid)
+		
+		# resolving the dataset in which we want to create the new dataprep
+		resolved = False
+		for dataset in glob.glob("datasets/*"):
+			if dsi == int(dataset.split("/")[-1].split("-")[1]):
+				resolved = True
+				break
+		if not resolved:
+			print("ERROR: the supplied dsi (dastaset id) doesn't exist")
+			exit(-1)
+
+		# creating the id for the new dataprep
+		max_id = -float("inf")
+		for dataprep in glob.glob(dataset+"/datapreps/*"):
+			print(dataprep)
+			if max_id < (id := int(dataprep.split("/")[-1].split("-")[1])):
+				max_id = id
+		new_id = max_id + 1
+
+		# creating the tags_string
+		tags_string = generate_tags_string(args.tags)
+
+		# creating the base folder boilerplate
+		DIR = dataset+f"/datapreps/dataprep-{new_id}{tags_string}/"
+		os.makedirs(DIR)
+		with open(DIR+".readme.md", "w") as f:
+			f.write("# DESCRIPTION\n\n")
+			f.write("# OBTENTION\n\n")
+			f.write("# META-DATA\n\n")
+			f.write("# DATA-LOCATION\n\n")
+		os.makedirs(DIR+"data")
+	
+
+	## Creating a new xpgroup
+	elif action == "xg":
+
+		# creating the id for the new xpgroup
+		max_id = -float("inf")
+		for xpgroup in glob.glob("xperiments/*"):
+			if max_id < (id := int(xpgroup.split("/")[-1].split("-")[1])):
+				max_id = id
+		if max_id == -float("inf"): max_id = 0
+		new_id = max_id + 1
+		
+		# creating the tags_string
+		tags_string = generate_tags_string(args.tags)
+		
+		# creating the base folder boilerplate
+		DIR = f"xperiments/xpgroup-{new_id}{tags_string}/"
+		os.makedirs(DIR)
+		with open(DIR+".readme.md", "w") as f:
+			f.write("# DESCRIPTION\n\n")
+	
+	
+	## Crearting a new xp
+	elif action == "xp":
+
+		# checking if the user provided the xpgroupid
+		if args.xpgroupid is None:
+			print("ERROR: xgi (xpgroup id) must be passed as argument to create a new dp (dataprep)")
+			exit(-1)
+		
+		# add a try catch block here in case the user passes something that isn't an integer
+		xgi = int(args.xpgroupid)
+		
+		# resolving the xpgroup on which we want to create the new xp
+		resolved = False
+		for xpgroup in glob.glob("xperiments/*"):
+			if xgi == int(xpgroup.split("/")[-1].split("-")[1]):
+				resolved = True
+				break
+		if not resolved:
+			print("ERROR: the supplied xgi (xpgroup id) doesn't exist")
+			exit(-1)
+
+		# creating the id for the new xp
+		max_id = -float("inf")
+		for xp in glob.glob(xpgroup+"/*"):
+			if max_id < (id := int(xp.split("/")[-1].split("-")[1])):
+				max_id = id
+		if max_id == -float("inf"): max_id = 0
+		new_id = max_id + 1
+		
+		# creating the tags_string
+		tags_string = generate_tags_string(args.tags)
+
+		# creating the base directory boilerplate
+		DIR = xpgroup + f"/xp-{new_id}{tags_string}/"
+		os.makedirs(DIR)
+		with open(DIR+".readme.md", "w") as f:
+			f.write("# DESCRIPTION\n\n")
+			f.write("# OBTENTION\n\n")
+			f.write("# META-DATA\n\n")
+			f.write("# MODELS-LOCATION\n\n")
+		os.makedirs(DIR+"train")
+		os.makedirs(DIR+"evals")
+	
+	else:
+		print("ERROR: a valid action (ds, dp, xg, xp) must be supplied")
+		exit(-1)
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/.readme.md b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/.readme.md
new file mode 100644
index 0000000..93eecb8
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/.readme.md
@@ -0,0 +1,6 @@
+# Tags
+- 10M code snippets
+- 10M parameter model
+
+# Description
+We train from scratch a 10M parameter model on the locally (legacy workflow) generated datapreping from dataset-2
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1-slurm-51582923.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1-slurm-51582923.out
new file mode 100644
index 0000000..0792319
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1-slurm-51582923.out
@@ -0,0 +1,3 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-2/eval-1-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"checkpoints/checkpoint_iter170000_epoch1.27_2024-09-18_20-02.pth"))
+slurmstepd: error: *** JOB 51582923 ON gv009 CANCELLED AT 2024-10-02T22:32:34 ***
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1.log
new file mode 100644
index 0000000..8123a39
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1.log
@@ -0,0 +1,20 @@
+|ITERS: 138817 / 20000000 | COMP: 0.69% | RATE: 1.80 it./s | SPD: 0.5558 s/it.| ERT: (127, 18, 28, 0) |                                                                                                 
+|hard-accuracy: 31.86% | soft-accuracy: 0.74% |                                                                                                                                                         
+
+Imports:
+
+Set the device to GPU if available, otherwise CPU:
+
+Loading the encode and decode functions and setting the vocab_size from the meta object:
+
+Redefining the model of the training:
+
+Loading the model:
+
+Creating and loading the model:
+
+Reading the test data:
+
+Splitting the test data into examples:
+
+Sequential Evaluation loop:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1.py
new file mode 100644
index 0000000..8f63ce1
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/evals/eval-1/eval-1-atmpt-1.py
@@ -0,0 +1,336 @@
+#@ EVAL-1-ATMPT-1
+#@ We evaluate the 20M model on hodhaifa generated test.txt for the 200M code snippets dataset
+
+## On Greene
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-2/"
+DDIR  = "/scratch/hb3020/training/60M-model-200M-data/training/"
+DDIR2 = "/scratch/hb3020/training/60M-model-200M-data/"
+
+## Logging boilerplate
+log_file = open(DIR+"eval-1-atmpt-1.log","w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+	
+
+## Imports
+log("Imports")
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Device set to {device}.")
+
+
+## Loading the encode and decode functions and setting the vocab_size from the meta object
+log("Loading the encode and decode functions and setting the vocab_size from the meta object")
+import pickle
+
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+itos = meta["itos"]
+
+def encode(s):
+	return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+
+def decode(l):
+	return ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+vocab_size = meta["vocab_size"]
+
+
+## Redefining the model of the training
+log("Redefining the model of the training")
+block_size = 256  # Maximum context length
+n_embd = 372	  # Embedding dimension
+n_head = 6		# Number of attention heads
+n_layer = 6	   # Number of transformer blocks
+dropout = 0	   # Dropout rate
+batch_size = 64   # Batch size for training
+
+
+## Loading the model
+log("Loading the model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	@torch.no_grad()
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+	
+
+## Creating and loading the model
+log("Creating and loading the model")
+model = GPT()
+model.eval()
+model.load_state_dict(torch.load(DIR+"checkpoints/checkpoint_iter170000_epoch1.27_2024-09-18_20-02.pth"))
+model.to(device)
+
+
+## Reading the test data
+log("Reading the test data")
+with open(DDIR2+"test.txt", "r") as f:
+	test_data = f.read()
+
+
+## Splitting the test data into examples
+log("Splitting the test data into examples")
+examples = test_data.split("\n\n")
+examples = examples[:]
+
+
+## Sequential Evaluation loop
+log("Sequential Evaluation loop")
+hard_match_counter = 0
+soft_match_counter = 0
+failures_counter = 0
+
+hard_match_successes = {"example":[], "all-generated-output":[]} # correct generated output + correct stopping (no hallucination) i.e. fully correct
+soft_match_successes = {"example":[], "all-generated-output":[]} # correct initial generated output BUT uncorrect stopping (hallucination)
+failures = {"example":[], "all-generated-output":[]} # completely uncorrect answer
+
+import time
+import pandas as pd
+import os
+
+os.makedirs(os.path.dirname(DIR+"eval-1-atmpt-1-results/"), exist_ok = True)
+
+checkpoint_interval = 5000
+
+hard_match_base = 0
+soft_match_base = 0
+failures_base = 0
+
+for i, example in enumerate(examples):
+	
+	past = time.time()
+	
+	# generating the output of the model
+	example_match = example.split("# output\n")
+	example_prompt = example_match[0] + "# output\n"
+	example_output = example_match[1]
+	example_prompt_tensor = torch.tensor(encode(example_prompt), dtype=torch.long).unsqueeze(0).to(device)
+	generated_example = decode(model.generate(example_prompt_tensor, max_new_tokens = len(example_output) + 20)[0].tolist())
+	
+	generated_output  = generated_example[len(example_prompt):]
+	example_code_generated_output = generated_output.split("\n\n")[0]
+	# if hard match
+	if example_code_generated_output == example_output:
+		hard_match_counter += 1
+		hard_match_successes["example"].append(example)
+		hard_match_successes["all-generated-output"].append(generated_output+"@")
+	# elif soft checking
+	elif example_code_generated_output[:len(example_output)] == example_output:
+		soft_match_counter += 1
+		soft_match_successes["example"].append(example)
+		soft_match_successes["all-generated-output"].append(generated_output+"@")
+	# else complete failure
+	else:
+		failures_counter += 1
+		failures["example"].append(example)
+		failures["all-generated-output"].append(generated_output+"@")
+	
+	present = time.time()
+	
+	log(f"|ITERS: {i+1} / {len(examples)} | COMP: {(i+1)/len(examples) * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((len(examples)-i-1) * (present-past))} |", p_level = 1)
+	log(f"|hard-accuracy: {(hard_match_counter/(i+1))*100:.2f}% | soft-accuracy: {(soft_match_counter/(i+1))*100:.2f}% |", p_level = 2)
+
+	if (i+1) % checkpoint_interval == 0:
+		
+		mode, header = ("w",True) if (i+1) == checkpoint_interval else ("a", False)
+		
+		hard_match_successes_df = pd.DataFrame(hard_match_successes)
+		soft_match_successes_df = pd.DataFrame(soft_match_successes)
+		failures_df = pd.DataFrame(failures)
+
+		hard_match_successes_df.index = hard_match_successes_df.index + hard_match_base
+		soft_match_successes_df.index = soft_match_successes_df.index + soft_match_base
+		failures_df.index = failures_df.index + failures_base
+		
+		hard_match_base = hard_match_counter
+		soft_match_base = soft_match_counter
+		failures_base = failures_counter
+
+		hard_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-hard-match-successes.csv", mode = mode, header = header)
+		soft_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-soft-match-successes.csv", mode = mode, header = header)
+		failures_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-failures.csv", mode = mode, header = header)
+		
+		hard_match_successes = {"example":[], "all-generated-output":[]}
+		soft_match_successes = {"example":[], "all-generated-output":[]}
+		failures = {"example":[], "all-generated-output":[]}
+
+
+## Logging the metrics
+log("Logging the metrics")
+import neptune
+
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	with_id = "IMG1-72",
+	capture_hardware_metrics = False
+)
+run["eval-1/hard-accuracy-percentage"] = (hard_match_counter/len(examples))*100
+run["eval-1/soft-accuracy-percentage"] = (soft_match_counter/len(examples))*100
+
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/runid.txt b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/runid.txt
new file mode 100644
index 0000000..8b389a8
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/runid.txt
@@ -0,0 +1 @@
+IMG1-15
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/train/model-training-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/train/model-training-atmpt-1.py
new file mode 100644
index 0000000..2282d86
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-2/train/model-training-atmpt-1.py
@@ -0,0 +1,353 @@
+# Model training
+
+## ATMPT-1 (on Greene)
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/"
+DDIR = "/scratch/yb2618/Tiny-Language-Models-Framework/datasets/dataset-2/datapreps/dataprep-1/data/"
+## Logging boilerplate
+log_file = open(DIR+"model-training-atmpt-1.log", "w")
+def log(s:str):
+	if s[0].upper() == s[0]:
+		start = "\n"
+		end = ":"
+	else:
+		start = "	--> "
+		end = ""
+	log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+import random
+import os
+import time
+import datetime
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.optim.lr_scheduler import StepLR
+import pandas as pd
+from tqdm import tqdm
+import re
+import numpy as np
+
+## Starting the netpune logging
+log("Starting the netpune logging")
+log("neptune init")
+import neptune
+run = neptune.init_run(
+    project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+    tags = ["10M-model", "~10M-code-snippets"],
+    description = "We train from scratch the 10M parameter model on dataset-2",
+    capture_hardware_metrics = False,
+)
+# First attempt so we log the runid
+log("saving the runid")
+runid = run["sys/id"].fetch()
+with open(DIR + "runid.txt", "w") as f:
+    f.write(runid)
+
+
+## Set the random seed for reproducibility
+log("Set the random seed for reproducibility")
+seed = 42
+torch.manual_seed(seed)
+random.seed(seed)
+np.random.seed(seed)
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+log(f"device set to {device}.")
+
+
+## Setting arche-hyperparams for the GPT model
+log("Setting arch-hyperparams for the GPT model")
+run["arch-hyperparams/block_size"] = block_size = 256  # Maximum context length
+run["arch-hyperparams/n_embd"] = n_embd = 372      # Embedding dimension
+run["arch-hyperparams/n_head"] = n_head = 6        # Number of attention heads
+run["arch-hyperparams/n_layer"] = n_layer = 6       # Number of transformer blocks
+
+# Loading the training and evaluation data
+log("Loading the training and evaluation data")
+log("train.bin")
+train_data = np.memmap(DDIR+"train.bin", dtype = np.uint16, mode="r")
+log("val.bin")
+val_data = np.memmap(DDIR+"val.bin", dtype = np.uint16, mode="r")
+
+# Setting the training hyper-parameters and util variables
+log("Setting training-hyperparams and util variables")
+run["train-hyperparams/batch_size"] = batch_size = 64   # Batch size for training
+run["train-hyperparams/dropout"] = dropout = 0       # Dropout rate
+run["train-hyperparms/max_pseudo_epochs"] = max_pseudo_epochs = 3
+run["train-hyperparams/learning_rate"] = learning_rate = 1e-3 # Initial Learning rate value
+run["train-hypeparams/max_degradations"] = max_degradations = 3 # number of consecutive degradations on val loss before stoping the training
+eval_interval = 5000 # Evaluation interval
+eval_iters = 500  # Number of iterations for evaluation
+max_iters = int( ( max_pseudo_epochs * len(train_data) ) / ( batch_size * block_size ) )
+log(f"max_iters = {max_iters}")
+miles = [int(max_iters * m) for m in [0.7, 0.8, 0.9]]  # Milestones for learning rate decay as fractions of max_iters
+run["train-hyperparams/miles"] = str(miles)
+
+
+
+compile = False # requires PyTorch 2.0
+
+## Defining the model and utilities
+log("Defining the model and utilities")
+log("The model")
+class LayerNorm(nn.Module):
+    """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+    def __init__(self, ndim, bias):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(ndim))
+        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+    def forward(self, input):
+        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+    """One head of self-attention."""
+
+    def __init__(self, head_size):
+        super().__init__()
+        self.key = nn.Linear(n_embd, head_size, bias=False)
+        self.query = nn.Linear(n_embd, head_size, bias=False)
+        self.value = nn.Linear(n_embd, head_size, bias=False)
+        self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        B,T,C = x.shape
+        k = self.key(x)   # (B, T, 16)
+        q = self.query(x) # (B, T, 16)
+        v = self.value(x)
+        
+        out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+            
+        return out
+
+class MultiHeadAttention(nn.Module):
+    """multiple heads of self-attention in parallel."""
+
+    def __init__(self, num_heads, head_size):
+        super().__init__()
+        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+        self.proj = nn.Linear(n_embd, n_embd)
+        self.dropout = nn.Dropout(dropout)
+        
+    def forward(self, x):
+        out = torch.cat([h(x) for h in self.heads], dim=-1)
+        out = self.dropout(self.proj(out))
+        return out
+    
+class FeedForward(nn.Module):
+    """ a simple linear layer followed by a non-linearity."""
+
+    def __init__(self, n_embd):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(n_embd, 4 * n_embd, bias=False),
+            nn.GELU(),
+            nn.Linear( 4 * n_embd, n_embd, bias=False),
+            nn.Dropout(dropout),
+        )
+
+    def forward(self, x):
+        return self.net(x)
+    
+class Block(nn.Module):
+    """ Transformer block: communication followed by feedforward."""
+
+    def __init__(self, n_embd, n_head):
+        super().__init__()
+        head_size = n_embd // n_head
+        self.sa = MultiHeadAttention(n_head, head_size)
+        self.ffwd = FeedForward(n_embd)
+        self.ln1 = nn.LayerNorm(n_embd, bias=False)
+        self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+    def forward(self, x):
+        x = x + self.sa(self.ln1(x))
+        x = x + self.ffwd(self.ln2(x))
+        return x
+
+class GPT(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        # each token directly reads off the logits for the next token from a lookup table
+        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+        self.position_embedding_table = nn.Embedding(block_size, n_embd)
+        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+        self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+        self.lm_head = nn.Linear(n_embd, vocab_size)
+
+    def forward(self, idx, targets=None):
+        B, T = idx.shape
+
+        # idx and targets are both (B,T) tensor of integers
+        tok_emb = self.token_embedding_table(idx) # (B,T,C)
+        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+        x = tok_emb + pos_emb # (B,T,C)
+        x = self.blocks(x) # (B,T,C)
+        x = self.ln_f(x) # (B,T,C)
+        logits = self.lm_head(x) # (B,T,vocab_size)
+
+        if targets is None:
+            loss = None
+        else:
+            B, T, C = logits.shape
+            logits = logits.view(B*T, C)
+            targets = targets.view(B*T)
+            loss = F.cross_entropy(logits, targets)
+
+        return logits, loss
+    
+    def generate(self, idx, max_new_tokens):
+        # idx is (B, T) array of indices in the current context
+        for _ in range(max_new_tokens):
+            # crop idx to the last block_size tokens
+            idx_cond = idx[:, -block_size:] # (B, T)
+            # get the predictions
+            logits, loss = self(idx_cond)
+            # focus only on the last time step
+            logits = logits[:, -1, :] # becomes (B, C)
+            # apply softmax to get probabilities
+            probs = F.softmax(logits, dim=-1) # (B, C)
+            # sample from the distribution
+            _, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+            #idx_next = torch.multinomial(probs, num_samples=1)
+            # append sampled index to the running sequence
+            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+        return idx
+
+# get random batch of data
+log("def get random batch of data")
+def get_batch(split):
+    data = train_data if split == 'train' else val_data
+    ix = torch.randint(len(data) - block_size, (batch_size,))
+    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
+    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
+    x, y = x.to(device), y.to(device)
+    return x, y
+
+# estimate loss on train and val splits
+log("def estimate loss")
+@torch.no_grad()
+def estimate_loss():
+    out = {}
+    model.eval()
+    for split in ['train', 'val']:
+        losses = torch.zeros(eval_iters) 
+        for k in range(eval_iters):
+            X, Y = get_batch(split)
+            logits, loss = model(X, Y)
+            losses[k] = loss.item()
+        out[split] = losses.mean()
+    model.train()
+    return out
+
+# helper function to make large numbers of parameters human-readable
+log("def human readable")
+def human_readable(num):
+    magnitude = 0
+    while abs(num) >= 1000:
+        magnitude += 1
+        num /= 1000.0
+    return '%.0f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
+
+
+## Loading the meta object
+log("Loading the meta object")
+log("loading")
+import pickle
+with open(DIR+"meta.pkl", "rb") as f:
+    meta = pickle.load(f)
+log("setting vocab size")
+vocab_size = meta["vocab_size"] 
+
+
+## Creating a new model
+log("Creating the model")
+model = GPT()
+m = model.to(device)
+num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+if compile:
+    print("compiling the model... (takes a ~minute)")
+    model = torch.compile(model) 
+num_parameters_hr = human_readable(num_parameters)
+log(f'The model has {num_parameters_hr} trainable parameters')
+
+
+## Preparing for the training loop
+log("Preparing for the training loop")
+
+# initializing the optimizer
+log("initialiazing the optimizer")
+optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
+
+# initializing the learning rate scheduler
+log("initializing the learing rate scheduler")
+scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=miles, gamma=0.1)
+
+
+# computing the initial loss
+log("computing the initial loss")
+losses = estimate_loss()
+
+# saving the last_losses for early stopping
+log("saving the last loss for early stopping")
+last_losses = losses
+
+# neptune logging the initial loss
+log("neptune logging the initial loss")
+run["losses_trace/train_loss"].append(losses["train"], step = 0)
+run["losses_trace/val_loss"].append(losses["val"], step = 0)
+
+
+## Training loop
+log("Training loop")
+log("==========================================================================================")
+early_stopping = {"state": False, "iter": None, "epoch": None}
+log(f'iter {0:5d} <=> epoch 0 | train loss {losses["train"]:.4f} | val loss {losses["val"]:.4f}')
+nb_degradations = 0
+for iter in range(max_iters):
+    # train the model for one iteration
+    xb, yb = get_batch('train')
+    # forward pass
+    logits, loss = model(xb, yb)
+    optimizer.zero_grad(set_to_none=True)
+    loss.backward()
+    optimizer.step()
+    # Step the scheduler
+    scheduler.step()
+    # evaluate the model on the train and val splits and log the losses
+    if (iter+1) % eval_interval == 0:
+        epoch = (block_size * batch_size * (iter+1))/len(train_data)
+        losses = estimate_loss()
+        log(f'iter {iter+1:5d} <=> epoch {epoch} | train loss {losses["train"]:.4f} | val loss {losses["val"]:.4f}')
+        if losses["val"] > last_losses["val"]:
+            nb_degradations += 1
+            if nb_degradations == max_degradations:
+                print(f"EARLY STOPPING at iter {iter+1} == epoch {epoch:.2f}")
+                early_stopping = {"state": True, "iter": iter+1, "epoch": epoch}
+                break
+        else:
+            nb_degradations = 0
+            
+        # Logging the losses trace
+        run["losses_trace/train_loss"].append(losses["train"], step = epoch)
+        run["losses_trace/val_loss"].append(losses["val"], step = epoch)
+        
+        # Saving the last_losses
+        last_losses = losses
+        
+        # Saving the model
+        now = datetime.datetime.now()
+        date_hour = now.strftime("%Y-%m-%d_%H-%M")
+        torch.save(model.state_dict(), f"{DIR}checkpoint_iter{iter+1}_epoch{epoch:.2f}_{date_hour}.pth")
+        
+run["early_stopping"] = early_stopping
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1-slurm-51542310.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1-slurm-51542310.out
new file mode 100644
index 0000000..4c4241a
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1-slurm-51542310.out
@@ -0,0 +1,3 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-4/eval-1-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"best-model.pth"))
+slurmstepd: error: *** JOB 51542310 ON gv008 CANCELLED AT 2024-10-02T02:03:47 ***
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1.log
new file mode 100644
index 0000000..a9fa020
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1.log
@@ -0,0 +1,20 @@
+|ITERS: 65034 / 20000000 | COMP: 0.33% | RATE: 1.43 it./s | SPD: 0.7007 s/it.| ERT: (161, 15, 52, 56) |                                                                                                 
+|hard-accuracy: 36.35% | soft-accuracy: 0.91% |                                                                                                                                                         
+
+Imports:
+
+Set the device to GPU if available, otherwise CPU:
+
+Loading the encode and decode functions and setting the vocab_size from the meta object:
+
+Redefining the model of the training:
+
+Loading the model:
+
+Creating and loading the model:
+
+Reading the test data:
+
+Splitting the test data into examples:
+
+Sequential Evaluation loop:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1.py
new file mode 100644
index 0000000..27763ad
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/evals/eval-1/eval-1-atmpt-1.py
@@ -0,0 +1,336 @@
+#@ EVAL-1-ATMPT-1
+#@ We evaluate the 20M model on hodhaifa generated test.txt for the 200M code snippets dataset
+
+## On Greene
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-4/"
+DDIR  = "/scratch/hb3020/training/60M-model-200M-data/training/"
+DDIR2 = "/scratch/hb3020/training/60M-model-200M-data/"
+
+## Logging boilerplate
+log_file = open(DIR+"eval-1-atmpt-1.log","w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+	
+
+## Imports
+log("Imports")
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Device set to {device}.")
+
+
+## Loading the encode and decode functions and setting the vocab_size from the meta object
+log("Loading the encode and decode functions and setting the vocab_size from the meta object")
+import pickle
+
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+itos = meta["itos"]
+
+def encode(s):
+	return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+
+def decode(l):
+	return ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+vocab_size = meta["vocab_size"]
+
+
+## Redefining the model of the training
+log("Redefining the model of the training")
+block_size = 256  # Maximum context length
+n_embd = 528	  # Embedding dimension
+n_head = 6		# Number of attention heads
+n_layer = 6	   # Number of transformer blocks
+dropout = 0	   # Dropout rate
+batch_size = 64   # Batch size for training
+
+
+## Loading the model
+log("Loading the model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	@torch.no_grad()
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+	
+
+## Creating and loading the model
+log("Creating and loading the model")
+model = GPT()
+model.eval()
+model.load_state_dict(torch.load(DIR+"best-model.pth"))
+model.to(device)
+
+
+## Reading the test data
+log("Reading the test data")
+with open(DDIR2+"test.txt", "r") as f:
+	test_data = f.read()
+
+
+## Splitting the test data into examples
+log("Splitting the test data into examples")
+examples = test_data.split("\n\n")
+examples = examples[:]
+
+
+## Sequential Evaluation loop
+log("Sequential Evaluation loop")
+hard_match_counter = 0
+soft_match_counter = 0
+failures_counter = 0
+
+hard_match_successes = {"example":[], "all-generated-output":[]} # correct generated output + correct stopping (no hallucination) i.e. fully correct
+soft_match_successes = {"example":[], "all-generated-output":[]} # correct initial generated output BUT uncorrect stopping (hallucination)
+failures = {"example":[], "all-generated-output":[]} # completely uncorrect answer
+
+import time
+import pandas as pd
+import os
+
+os.makedirs(os.path.dirname(DIR+"eval-1-atmpt-1-results/"), exist_ok = True)
+
+checkpoint_interval = 5000
+
+hard_match_base = 0
+soft_match_base = 0
+failures_base = 0
+
+for i, example in enumerate(examples):
+	
+	past = time.time()
+	
+	# generating the output of the model
+	example_match = example.split("# output\n")
+	example_prompt = example_match[0] + "# output\n"
+	example_output = example_match[1]
+	example_prompt_tensor = torch.tensor(encode(example_prompt), dtype=torch.long).unsqueeze(0).to(device)
+	generated_example = decode(model.generate(example_prompt_tensor, max_new_tokens = len(example_output) + 20)[0].tolist())
+	
+	generated_output  = generated_example[len(example_prompt):]
+	example_code_generated_output = generated_output.split("\n\n")[0]
+	# if hard match
+	if example_code_generated_output == example_output:
+		hard_match_counter += 1
+		hard_match_successes["example"].append(example)
+		hard_match_successes["all-generated-output"].append(generated_output+"@")
+	# elif soft checking
+	elif example_code_generated_output[:len(example_output)] == example_output:
+		soft_match_counter += 1
+		soft_match_successes["example"].append(example)
+		soft_match_successes["all-generated-output"].append(generated_output+"@")
+	# else complete failure
+	else:
+		failures_counter += 1
+		failures["example"].append(example)
+		failures["all-generated-output"].append(generated_output+"@")
+	
+	present = time.time()
+	
+	log(f"|ITERS: {i+1} / {len(examples)} | COMP: {(i+1)/len(examples) * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((len(examples)-i-1) * (present-past))} |", p_level = 1)
+	log(f"|hard-accuracy: {(hard_match_counter/(i+1))*100:.2f}% | soft-accuracy: {(soft_match_counter/(i+1))*100:.2f}% |", p_level = 2)
+
+	if (i+1) % checkpoint_interval == 0:
+		
+		mode, header = ("w",True) if (i+1) == checkpoint_interval else ("a", False)
+		
+		hard_match_successes_df = pd.DataFrame(hard_match_successes)
+		soft_match_successes_df = pd.DataFrame(soft_match_successes)
+		failures_df = pd.DataFrame(failures)
+
+		hard_match_successes_df.index = hard_match_successes_df.index + hard_match_base
+		soft_match_successes_df.index = soft_match_successes_df.index + soft_match_base
+		failures_df.index = failures_df.index + failures_base
+		
+		hard_match_base = hard_match_counter
+		soft_match_base = soft_match_counter
+		failures_base = failures_counter
+
+		hard_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-hard-match-successes.csv", mode = mode, header = header)
+		soft_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-soft-match-successes.csv", mode = mode, header = header)
+		failures_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-failures.csv", mode = mode, header = header)
+		
+		hard_match_successes = {"example":[], "all-generated-output":[]}
+		soft_match_successes = {"example":[], "all-generated-output":[]}
+		failures = {"example":[], "all-generated-output":[]}
+
+
+## Logging the metrics
+log("Logging the metrics")
+import neptune
+
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	with_id = "IMG1-72",
+	capture_hardware_metrics = False
+)
+run["eval-1/hard-accuracy-percentage"] = (hard_match_counter/len(examples))*100
+run["eval-1/soft-accuracy-percentage"] = (soft_match_counter/len(examples))*100
+
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/readme.md b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/readme.md
new file mode 100644
index 0000000..c58932c
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/readme.md
@@ -0,0 +1,2 @@
+# Description
+We train from scratch the 20M parameters model on hodhaifa ~ 200M dataset with the full loading of the data into ram for acceleration 
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/runid.txt b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/runid.txt
new file mode 100644
index 0000000..0af866a
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/runid.txt
@@ -0,0 +1 @@
+IMG1-73
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1-slurm-51481243.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1-slurm-51481243.out
new file mode 100644
index 0000000..7a17238
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1-slurm-51481243.out
@@ -0,0 +1,6 @@
+[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/younes-boukacem-workspace/tiny-lm-full-random-mode/e/IMG1-72
+[neptune] [info   ] Shutting down background jobs, please wait a moment...
+[neptune] [info   ] Done!
+[neptune] [info   ] Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.
+[neptune] [info   ] All 4 operations synced, thanks for waiting!
+[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/younes-boukacem-workspace/tiny-lm-full-random-mode/e/IMG1-72/metadata
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1.log
new file mode 100644
index 0000000..9fe94a2
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1.log
@@ -0,0 +1,19 @@
+                                                                                                                                                                                                        
+                                                                                                                                                                                                        
+
+Importing ...:
+	--> took (0, 0, 0, 4)
+
+Starting the netpune logging:
+	--> neptune init
+	--> saving the runid
+
+Set the random seed for reproducibility:
+
+Set the device to GPU if available, otherwise CPU:
+	--> device set to cuda.
+
+Setting arch-hyperparams for the GPT model:
+
+Loading the training and evaluation data:
+	--> train.bin
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1.py
new file mode 100644
index 0000000..7c4bed1
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-4/train/model-training-atmpt-1.py
@@ -0,0 +1,415 @@
+# Model training
+
+## ATMPT-1 (on Greene)
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-4/"
+DDIR = "/scratch/hb3020/training/60M-model-200M-data/training/"
+
+## Logging boilerplate
+log_file = open(DIR+"model-training-atmpt-1.log", "w")
+# progress bar reception string
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds 
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+
+log("Importing ...")
+import time
+
+before = time.time()
+import random
+import datetime
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+after = time.time()
+log(f"took {convert_seconds(after - before)}")
+
+## Starting the netpune logging
+log("Starting the netpune logging")
+log("neptune init")
+import neptune
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	tags = ["20M-model", "~200M-code-snippets"],
+	description = "We train from scratch the 20M parameters model on hodhaifa ~ 200M dataset with the full loading of the data into ram for acceleration ",
+	capture_hardware_metrics = False,
+)
+# First attempt so we log the runid
+log("saving the runid")
+runid = run["sys/id"].fetch()
+with open(DIR + "runid.txt", "w") as f:
+	f.write(runid)
+
+
+## Set the random seed for reproducibility
+log("Set the random seed for reproducibility")
+seed = 42
+torch.manual_seed(seed)
+random.seed(seed)
+np.random.seed(seed)
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+log(f"device set to {device}.")
+
+
+## Setting arch-hyperparams for the GPT model
+log("Setting arch-hyperparams for the GPT model")
+run["arch-hyperparams/block_size"] = block_size = 256  # Maximum context length
+run["arch-hyperparams/n_embd"] = n_embd = 528	  # Embedding dimension
+run["arch-hyperparams/n_head"] = n_head = 6		# Number of attention heads
+run["arch-hyperparams/n_layer"] = n_layer = 6	   # Number of transformer blocks
+
+# Loading the training and evaluation data
+log("Loading the training and evaluation data")
+log("train.bin")
+before = time.time()
+train_data = np.memmap(DDIR+"train.bin", dtype = np.uint16, mode="r")
+train_data = np.array(train_data)
+after = time.time()
+log(f"took {convert_seconds(after - before)}")
+
+log("val.bin")
+before = time.time()
+val_data = np.memmap(DDIR+"val.bin", dtype = np.uint16, mode="r")
+val_data = np.array(val_data)
+after = time.time()
+log(f"took {convert_seconds(after - before)}")
+
+# Setting the train-hyperparams and util variables
+log("Setting train-hyperparams and util variables")
+run["train-hyperparams/batch_size"] = batch_size = 64   # Batch size for training
+run["train-hyperparams/dropout"] = dropout = 0	   # Dropout rate
+run["train-hyperparms/max_pseudo_epochs"] = max_pseudo_epochs = 3
+run["train-hyperparams/learning_rate"] = learning_rate = 1e-3 # Initial Learning rate value
+run["train-hypeparams/max_degradations"] = max_degradations = 3 # number of consecutive degradations on val loss before stoping the training
+eval_interval = 5000 # Evaluation interval
+eval_iters = 500  # Number of iterations for evaluation
+max_iters = int( ( max_pseudo_epochs * len(train_data) ) / ( batch_size * block_size ) )
+log(f"max_iters = {max_iters}")
+miles = [int(max_iters * m) for m in [0.7, 0.8, 0.9]]  # Milestones for learning rate decay as fractions of max_iters
+run["train-hyperparams/miles"] = str(miles)
+
+
+
+compile = False # requires PyTorch 2.0
+
+## Defining the model and utilities
+log("Defining the model and utilities")
+log("The model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+
+# get random batch of data
+log("def get random batch of data")
+def get_batch(split):
+	data = train_data if split == 'train' else val_data
+	ix = torch.randint(len(data) - block_size, (batch_size,))
+	x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
+	y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
+	x, y = x.to(device), y.to(device)
+	return x, y
+
+# estimate loss on train and val splits
+log("def estimate loss")
+@torch.no_grad()
+def estimate_loss():
+	out = {}
+	model.eval()
+	for split in ['train', 'val']:
+		losses = torch.zeros(eval_iters)
+		for k in range(eval_iters):
+			past = time.time() 
+			X, Y = get_batch(split)
+			logits, loss = model(X, Y)
+			losses[k] = loss.item()
+			present = time.time()
+			log(f"{split}>|ITERS: {k+1} / {eval_iters} | COMP: {(k+1)/eval_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((eval_iters-k-1) * (present-past))} |", p_level = 2)
+		out[split] = losses.mean()
+	model.train()
+	return out
+
+# helper function to make large numbers of parameters human-readable
+log("def human readable")
+def human_readable(num):
+	magnitude = 0
+	while abs(num) >= 1000:
+		magnitude += 1
+		num /= 1000.0
+	return '%.0f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
+
+
+## Loading the meta object
+log("Loading the meta object")
+log("loading")
+import pickle
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+log("setting vocab size")
+vocab_size = meta["vocab_size"] 
+
+
+## Creating a new model
+log("Creating the model")
+model = GPT()
+m = model.to(device)
+num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+if compile:
+	print("compiling the model... (takes a ~minute)")
+	model = torch.compile(model) 
+num_parameters_hr = human_readable(num_parameters)
+log(f'The model has {num_parameters_hr} trainable parameters')
+
+
+## Preparing for the training loop
+log("Preparing for the training loop")
+
+# initializing the optimizer
+log("initialiazing the optimizer")
+optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
+
+# initializing the learning rate scheduler
+log("initializing the learing rate scheduler")
+scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=miles, gamma=0.1)
+
+
+# computing the initial loss
+log("computing the initial loss")
+losses = estimate_loss()
+
+# saving the last_losses for early stopping
+log("saving the last loss for early stopping")
+last_losses = losses
+best_val_loss = losses["val"]
+
+# neptune logging the initial loss
+log("neptune logging the initial loss")
+run["losses_trace/train_loss"].append(losses["train"], step = 0)
+run["losses_trace/val_loss"].append(losses["val"], step = 0)
+
+
+## Training loop
+log("Training loop")
+log("==========================================================================================")
+early_stopping = {"state": False, "iter": None, "epoch": None}
+now = datetime.datetime.now()
+date_hour = now.strftime("%Y-%m-%d_%H-%M")
+log(f'{date_hour} : iter {0:5d} <=> epoch 0 | train loss {losses["train"]:.4f} | val loss {losses["val"]:.4f}')
+nb_degradations = 0
+
+
+log("training ...")
+for iter in range(max_iters):
+	past = time.time()
+	# train the model for one iteration
+	xb, yb = get_batch('train')
+	# forward pass
+	logits, loss = model(xb, yb)
+	optimizer.zero_grad(set_to_none=True)
+	loss.backward()
+	optimizer.step()
+	# Step the scheduler
+	scheduler.step()
+
+	present = time.time()
+	log(f"|ITERS: {iter+1} / {max_iters} | COMP: {(iter+1)/max_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((max_iters-iter-1) * (present-past))}", p_level = 1)
+	
+	# evaluate the model on the train and val splits and log the losses
+	if (iter+1) % eval_interval == 0:
+		log("checkpointing ...")
+		epoch = (block_size * batch_size * (iter+1))/len(train_data)
+		losses = estimate_loss()
+		now = datetime.datetime.now()
+		date_hour = now.strftime("%Y-%m-%d_%H-%M")
+		log(f'{date_hour} : iter {iter+1:10d} <=> epoch {epoch} | train loss {losses["train"]:.10f} | val loss {losses["val"]:.10f}')
+		if losses["val"] > last_losses["val"]:
+			nb_degradations += 1
+			if nb_degradations == max_degradations:
+				log(f"EARLY STOPPING at iter {iter+1} == epoch {epoch}")
+				early_stopping = {"state": True, "iter": iter+1, "epoch": epoch}
+				break
+		else:
+			nb_degradations = 0
+			
+		# Logging the losses trace
+		run["losses_trace/train_loss"].append(losses["train"], step = epoch)
+		run["losses_trace/val_loss"].append(losses["val"], step = epoch)
+		
+		# Saving the last_losses
+		last_losses = losses
+		
+		# Saving the model
+		now = datetime.datetime.now()
+		date_hour = now.strftime("%Y-%m-%d_%H-%M")
+		torch.save(model.state_dict(), f"{DIR}checkpoints/checkpoint_iter{iter+1}_epoch{epoch:.2f}_{date_hour}.pth")
+		if losses["val"] < best_val_loss:
+			best_val_loss = losses["val"]
+			torch.save(model.state_dict(), f"{DIR}best-model.pth")
+			with open(f"{DIR}best-model.info", "w") as f:
+				f.write(f"iter : {iter+1}\n")
+				f.write(f"epoch : {epoch}\n")
+				f.write(f"date-hour : {date_hour}\n")
+		log("training ...")
+
+run["early_stopping"] = early_stopping
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1-slurm-51645783.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1-slurm-51645783.out
new file mode 100644
index 0000000..e0b3b12
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1-slurm-51645783.out
@@ -0,0 +1,157 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-5/eval-1-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"best-model.pth"))
+Device set to cuda.
+Traceback (most recent call last):
+  File "/scratch/yb2618/tiny-lm-full-random-mode/ae-5/eval-1-atmpt-1.py", line 226, in <module>
+    model.load_state_dict(torch.load(DIR+"best-model.pth"))
+  File "/scratch/yb2618/envs/tinylm/lib/python3.11/site-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
+    raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
+RuntimeError: Error(s) in loading state_dict for GPT:
+	size mismatch for token_embedding_table.weight: copying a param with shape torch.Size([54, 642]) from checkpoint, the shape in current model is torch.Size([54, 912]).
+	size mismatch for position_embedding_table.weight: copying a param with shape torch.Size([256, 642]) from checkpoint, the shape in current model is torch.Size([256, 912]).
+	size mismatch for blocks.0.sa.heads.0.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.0.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.0.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.1.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.1.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.1.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.2.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.2.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.2.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.3.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.3.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.3.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.4.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.4.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.4.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.5.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.5.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.heads.5.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.0.sa.proj.weight: copying a param with shape torch.Size([642, 642]) from checkpoint, the shape in current model is torch.Size([912, 912]).
+	size mismatch for blocks.0.sa.proj.bias: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.0.ffwd.net.0.weight: copying a param with shape torch.Size([2568, 642]) from checkpoint, the shape in current model is torch.Size([3648, 912]).
+	size mismatch for blocks.0.ffwd.net.2.weight: copying a param with shape torch.Size([642, 2568]) from checkpoint, the shape in current model is torch.Size([912, 3648]).
+	size mismatch for blocks.0.ln1.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.0.ln2.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.1.sa.heads.0.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.0.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.0.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.1.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.1.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.1.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.2.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.2.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.2.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.3.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.3.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.3.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.4.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.4.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.4.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.5.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.5.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.heads.5.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.1.sa.proj.weight: copying a param with shape torch.Size([642, 642]) from checkpoint, the shape in current model is torch.Size([912, 912]).
+	size mismatch for blocks.1.sa.proj.bias: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.1.ffwd.net.0.weight: copying a param with shape torch.Size([2568, 642]) from checkpoint, the shape in current model is torch.Size([3648, 912]).
+	size mismatch for blocks.1.ffwd.net.2.weight: copying a param with shape torch.Size([642, 2568]) from checkpoint, the shape in current model is torch.Size([912, 3648]).
+	size mismatch for blocks.1.ln1.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.1.ln2.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.2.sa.heads.0.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.0.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.0.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.1.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.1.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.1.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.2.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.2.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.2.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.3.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.3.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.3.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.4.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.4.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.4.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.5.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.5.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.heads.5.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.2.sa.proj.weight: copying a param with shape torch.Size([642, 642]) from checkpoint, the shape in current model is torch.Size([912, 912]).
+	size mismatch for blocks.2.sa.proj.bias: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.2.ffwd.net.0.weight: copying a param with shape torch.Size([2568, 642]) from checkpoint, the shape in current model is torch.Size([3648, 912]).
+	size mismatch for blocks.2.ffwd.net.2.weight: copying a param with shape torch.Size([642, 2568]) from checkpoint, the shape in current model is torch.Size([912, 3648]).
+	size mismatch for blocks.2.ln1.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.2.ln2.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.3.sa.heads.0.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.0.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.0.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.1.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.1.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.1.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.2.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.2.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.2.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.3.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.3.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.3.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.4.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.4.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.4.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.5.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.5.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.heads.5.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.3.sa.proj.weight: copying a param with shape torch.Size([642, 642]) from checkpoint, the shape in current model is torch.Size([912, 912]).
+	size mismatch for blocks.3.sa.proj.bias: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.3.ffwd.net.0.weight: copying a param with shape torch.Size([2568, 642]) from checkpoint, the shape in current model is torch.Size([3648, 912]).
+	size mismatch for blocks.3.ffwd.net.2.weight: copying a param with shape torch.Size([642, 2568]) from checkpoint, the shape in current model is torch.Size([912, 3648]).
+	size mismatch for blocks.3.ln1.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.3.ln2.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.4.sa.heads.0.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.0.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.0.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.1.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.1.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.1.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.2.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.2.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.2.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.3.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.3.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.3.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.4.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.4.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.4.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.5.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.5.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.heads.5.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.4.sa.proj.weight: copying a param with shape torch.Size([642, 642]) from checkpoint, the shape in current model is torch.Size([912, 912]).
+	size mismatch for blocks.4.sa.proj.bias: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.4.ffwd.net.0.weight: copying a param with shape torch.Size([2568, 642]) from checkpoint, the shape in current model is torch.Size([3648, 912]).
+	size mismatch for blocks.4.ffwd.net.2.weight: copying a param with shape torch.Size([642, 2568]) from checkpoint, the shape in current model is torch.Size([912, 3648]).
+	size mismatch for blocks.4.ln1.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.4.ln2.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.5.sa.heads.0.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.0.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.0.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.1.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.1.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.1.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.2.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.2.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.2.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.3.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.3.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.3.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.4.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.4.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.4.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.5.key.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.5.query.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.heads.5.value.weight: copying a param with shape torch.Size([107, 642]) from checkpoint, the shape in current model is torch.Size([152, 912]).
+	size mismatch for blocks.5.sa.proj.weight: copying a param with shape torch.Size([642, 642]) from checkpoint, the shape in current model is torch.Size([912, 912]).
+	size mismatch for blocks.5.sa.proj.bias: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.5.ffwd.net.0.weight: copying a param with shape torch.Size([2568, 642]) from checkpoint, the shape in current model is torch.Size([3648, 912]).
+	size mismatch for blocks.5.ffwd.net.2.weight: copying a param with shape torch.Size([642, 2568]) from checkpoint, the shape in current model is torch.Size([912, 3648]).
+	size mismatch for blocks.5.ln1.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for blocks.5.ln2.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for ln_f.weight: copying a param with shape torch.Size([642]) from checkpoint, the shape in current model is torch.Size([912]).
+	size mismatch for lm_head.weight: copying a param with shape torch.Size([54, 642]) from checkpoint, the shape in current model is torch.Size([54, 912]).
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1-slurm-51645798.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1-slurm-51645798.out
new file mode 100644
index 0000000..fc123ba
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1-slurm-51645798.out
@@ -0,0 +1,3 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-5/eval-1-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"best-model.pth"))
+slurmstepd: error: *** JOB 51645798 ON gr029 CANCELLED AT 2024-10-04T09:04:19 ***
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1.log
new file mode 100644
index 0000000..d10be86
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1.log
@@ -0,0 +1,20 @@
+|ITERS: 174065 / 20000000 | COMP: 0.87% | RATE: 1.57 it./s | SPD: 0.6351 s/it.| ERT: (145, 17, 30, 33) |                                                                                                
+|hard-accuracy: 37.82% | soft-accuracy: 0.98% |                                                                                                                                                         
+
+Imports:
+
+Set the device to GPU if available, otherwise CPU:
+
+Loading the encode and decode functions and setting the vocab_size from the meta object:
+
+Redefining the model of the training:
+
+Loading the model:
+
+Creating and loading the model:
+
+Reading the test data:
+
+Splitting the test data into examples:
+
+Sequential Evaluation loop:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1.py
new file mode 100644
index 0000000..50a2e55
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-1/eval-1-atmpt-1.py
@@ -0,0 +1,336 @@
+#@ EVAL-1-ATMPT-1
+#@ We evaluate the 20M model on hodhaifa generated test.txt for the 200M code snippets dataset
+
+## On Greene
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-5/"
+DDIR  = "/scratch/hb3020/training/60M-model-200M-data/training/"
+DDIR2 = "/scratch/hb3020/training/60M-model-200M-data/"
+
+## Logging boilerplate
+log_file = open(DIR+"eval-1-atmpt-1.log","w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+	
+
+## Imports
+log("Imports")
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Device set to {device}.")
+
+
+## Loading the encode and decode functions and setting the vocab_size from the meta object
+log("Loading the encode and decode functions and setting the vocab_size from the meta object")
+import pickle
+
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+itos = meta["itos"]
+
+def encode(s):
+	return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+
+def decode(l):
+	return ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+vocab_size = meta["vocab_size"]
+
+
+## Redefining the model of the training
+log("Redefining the model of the training")
+block_size = 256  # Maximum context length
+n_embd = 642  # Embedding dimension
+n_head = 6		# Number of attention heads
+n_layer = 6	   # Number of transformer blocks
+dropout = 0	   # Dropout rate
+batch_size = 64   # Batch size for training
+
+
+## Loading the model
+log("Loading the model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	@torch.no_grad()
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+	
+
+## Creating and loading the model
+log("Creating and loading the model")
+model = GPT()
+model.eval()
+model.load_state_dict(torch.load(DIR+"best-model.pth"))
+model.to(device)
+
+
+## Reading the test data
+log("Reading the test data")
+with open(DDIR2+"test.txt", "r") as f:
+	test_data = f.read()
+
+
+## Splitting the test data into examples
+log("Splitting the test data into examples")
+examples = test_data.split("\n\n")
+examples = examples[:]
+
+
+## Sequential Evaluation loop
+log("Sequential Evaluation loop")
+hard_match_counter = 0
+soft_match_counter = 0
+failures_counter = 0
+
+hard_match_successes = {"example":[], "all-generated-output":[]} # correct generated output + correct stopping (no hallucination) i.e. fully correct
+soft_match_successes = {"example":[], "all-generated-output":[]} # correct initial generated output BUT uncorrect stopping (hallucination)
+failures = {"example":[], "all-generated-output":[]} # completely uncorrect answer
+
+import time
+import pandas as pd
+import os
+
+os.makedirs(os.path.dirname(DIR+"eval-1-atmpt-1-results/"), exist_ok = True)
+
+checkpoint_interval = 5000
+
+hard_match_base = 0
+soft_match_base = 0
+failures_base = 0
+
+for i, example in enumerate(examples):
+	
+	past = time.time()
+	
+	# generating the output of the model
+	example_match = example.split("# output\n")
+	example_prompt = example_match[0] + "# output\n"
+	example_output = example_match[1]
+	example_prompt_tensor = torch.tensor(encode(example_prompt), dtype=torch.long).unsqueeze(0).to(device)
+	generated_example = decode(model.generate(example_prompt_tensor, max_new_tokens = len(example_output) + 20)[0].tolist())
+	
+	generated_output  = generated_example[len(example_prompt):]
+	example_code_generated_output = generated_output.split("\n\n")[0]
+	# if hard match
+	if example_code_generated_output == example_output:
+		hard_match_counter += 1
+		hard_match_successes["example"].append(example)
+		hard_match_successes["all-generated-output"].append(generated_output+"@")
+	# elif soft checking
+	elif example_code_generated_output[:len(example_output)] == example_output:
+		soft_match_counter += 1
+		soft_match_successes["example"].append(example)
+		soft_match_successes["all-generated-output"].append(generated_output+"@")
+	# else complete failure
+	else:
+		failures_counter += 1
+		failures["example"].append(example)
+		failures["all-generated-output"].append(generated_output+"@")
+	
+	present = time.time()
+	
+	log(f"|ITERS: {i+1} / {len(examples)} | COMP: {(i+1)/len(examples) * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((len(examples)-i-1) * (present-past))} |", p_level = 1)
+	log(f"|hard-accuracy: {(hard_match_counter/(i+1))*100:.2f}% | soft-accuracy: {(soft_match_counter/(i+1))*100:.2f}% |", p_level = 2)
+
+	if (i+1) % checkpoint_interval == 0:
+		
+		mode, header = ("w",True) if (i+1) == checkpoint_interval else ("a", False)
+		
+		hard_match_successes_df = pd.DataFrame(hard_match_successes)
+		soft_match_successes_df = pd.DataFrame(soft_match_successes)
+		failures_df = pd.DataFrame(failures)
+
+		hard_match_successes_df.index = hard_match_successes_df.index + hard_match_base
+		soft_match_successes_df.index = soft_match_successes_df.index + soft_match_base
+		failures_df.index = failures_df.index + failures_base
+		
+		hard_match_base = hard_match_counter
+		soft_match_base = soft_match_counter
+		failures_base = failures_counter
+
+		hard_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-hard-match-successes.csv", mode = mode, header = header)
+		soft_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-soft-match-successes.csv", mode = mode, header = header)
+		failures_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-failures.csv", mode = mode, header = header)
+		
+		hard_match_successes = {"example":[], "all-generated-output":[]}
+		soft_match_successes = {"example":[], "all-generated-output":[]}
+		failures = {"example":[], "all-generated-output":[]}
+
+
+## Logging the metrics
+log("Logging the metrics")
+import neptune
+
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	with_id = "IMG1-72",
+	capture_hardware_metrics = False
+)
+run["eval-1/hard-accuracy-percentage"] = (hard_match_counter/len(examples))*100
+run["eval-1/soft-accuracy-percentage"] = (soft_match_counter/len(examples))*100
+
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1-slurm-51765143.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1-slurm-51765143.out
new file mode 100644
index 0000000..d270e70
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1-slurm-51765143.out
@@ -0,0 +1,3 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-5/eval-2-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"best-model.pth"))
+slurmstepd: error: *** JOB 51765143 ON gr004 CANCELLED AT 2024-10-05T08:51:37 ***
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1.log
new file mode 100644
index 0000000..c48cdea
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1.log
@@ -0,0 +1,20 @@
+|ITERS: 26968 / 20000000 | COMP: 0.13% | RATE: 1.52 it./s | SPD: 0.6582 s/it.| ERT: (152, 3, 35, 7) |                                                                                                   
+|hard-accuracy: 43.57% | soft-accuracy: 0.52% |                                                                                                                                                         
+
+Imports:
+
+Set the device to GPU if available, otherwise CPU:
+
+Loading the encode and decode functions and setting the vocab_size from the meta object:
+
+Redefining the model of the training:
+
+Loading the model:
+
+Creating and loading the model:
+
+Reading the test data:
+
+Splitting the test data into examples:
+
+Sequential Evaluation loop:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1.py
new file mode 100644
index 0000000..226da4d
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/evals/eval-2/eval-2-atmpt-1.py
@@ -0,0 +1,336 @@
+#@ EVAL-2-ATMPT-1
+#@ We evaluate the 20M model on hodhaifa generated test.txt for the 200M code snippets dataset
+
+## On Greene
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-5/"
+DDIR  = "/scratch/hb3020/training/60M-model-200M-data/training/"
+DDIR2 = "/scratch/hb3020/training/60M-model-200M-data/"
+
+## Logging boilerplate
+log_file = open(DIR+"eval-2-atmpt-1.log","w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+	
+
+## Imports
+log("Imports")
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Device set to {device}.")
+
+
+## Loading the encode and decode functions and setting the vocab_size from the meta object
+log("Loading the encode and decode functions and setting the vocab_size from the meta object")
+import pickle
+
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+itos = meta["itos"]
+
+def encode(s):
+	return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+
+def decode(l):
+	return ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+vocab_size = meta["vocab_size"]
+
+
+## Redefining the model of the training
+log("Redefining the model of the training")
+block_size = 256  # Maximum context length
+n_embd = 642  # Embedding dimension
+n_head = 6		# Number of attention heads
+n_layer = 6	   # Number of transformer blocks
+dropout = 0	   # Dropout rate
+batch_size = 64   # Batch size for training
+
+
+## Loading the model
+log("Loading the model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	@torch.no_grad()
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+	
+
+## Creating and loading the model
+log("Creating and loading the model")
+model = GPT()
+model.eval()
+model.load_state_dict(torch.load(DIR+"best-model.pth"))
+model.to(device)
+
+
+## Reading the test data
+log("Reading the test data")
+with open(DDIR2+"test.txt", "r") as f:
+	test_data = f.read()
+
+
+## Splitting the test data into examples
+log("Splitting the test data into examples")
+examples = test_data.split("\n\n")
+examples = examples[:]
+
+
+## Sequential Evaluation loop
+log("Sequential Evaluation loop")
+hard_match_counter = 0
+soft_match_counter = 0
+failures_counter = 0
+
+hard_match_successes = {"example":[], "all-generated-output":[]} # correct generated output + correct stopping (no hallucination) i.e. fully correct
+soft_match_successes = {"example":[], "all-generated-output":[]} # correct initial generated output BUT uncorrect stopping (hallucination)
+failures = {"example":[], "all-generated-output":[]} # completely uncorrect answer
+
+import time
+import pandas as pd
+import os
+
+os.makedirs(os.path.dirname(DIR+"eval-2-atmpt-1-results/"), exist_ok = True)
+
+checkpoint_interval = 5000
+
+hard_match_base = 0
+soft_match_base = 0
+failures_base = 0
+
+for i, example in enumerate(examples):
+	
+	past = time.time()
+	
+	# generating the output of the model
+	example_match = example.split("# output\n")
+	example_prompt = example_match[0] + "# output\n"
+	example_output = example_match[1]
+	example_prompt_tensor = torch.tensor(encode(example_prompt), dtype=torch.long).unsqueeze(0).to(device)
+	generated_example = decode(model.generate(example_prompt_tensor, max_new_tokens = len(example_output) + 20)[0].tolist())
+	
+	generated_output  = generated_example[len(example_prompt):]
+	example_code_generated_output = generated_output.split("\n\n")[0]
+	# if hard match
+	if example_code_generated_output == example_output:
+		hard_match_counter += 1
+		hard_match_successes["example"].append(example)
+		hard_match_successes["all-generated-output"].append(generated_output+"@")
+	# elif soft checking
+	elif example_code_generated_output[:len(example_output)] == example_output:
+		soft_match_counter += 1
+		soft_match_successes["example"].append(example)
+		soft_match_successes["all-generated-output"].append(generated_output+"@")
+	# else complete failure
+	else:
+		failures_counter += 1
+		failures["example"].append(example)
+		failures["all-generated-output"].append(generated_output+"@")
+	
+	present = time.time()
+	
+	log(f"|ITERS: {i+1} / {len(examples)} | COMP: {(i+1)/len(examples) * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((len(examples)-i-1) * (present-past))} |", p_level = 1)
+	log(f"|hard-accuracy: {(hard_match_counter/(i+1))*100:.2f}% | soft-accuracy: {(soft_match_counter/(i+1))*100:.2f}% |", p_level = 2)
+
+	if (i+1) % checkpoint_interval == 0:
+		
+		mode, header = ("w",True) if (i+1) == checkpoint_interval else ("a", False)
+		
+		hard_match_successes_df = pd.DataFrame(hard_match_successes)
+		soft_match_successes_df = pd.DataFrame(soft_match_successes)
+		failures_df = pd.DataFrame(failures)
+
+		hard_match_successes_df.index = hard_match_successes_df.index + hard_match_base
+		soft_match_successes_df.index = soft_match_successes_df.index + soft_match_base
+		failures_df.index = failures_df.index + failures_base
+		
+		hard_match_base = hard_match_counter
+		soft_match_base = soft_match_counter
+		failures_base = failures_counter
+
+		hard_match_successes_df.to_csv(DIR+"eval-2-atmpt-1-results/eval-2-atmpt-1-hard-match-successes.csv", mode = mode, header = header)
+		soft_match_successes_df.to_csv(DIR+"eval-2-atmpt-1-results/eval-2-atmpt-1-soft-match-successes.csv", mode = mode, header = header)
+		failures_df.to_csv(DIR+"eval-2-atmpt-1-results/eval-2-atmpt-1-failures.csv", mode = mode, header = header)
+		
+		hard_match_successes = {"example":[], "all-generated-output":[]}
+		soft_match_successes = {"example":[], "all-generated-output":[]}
+		failures = {"example":[], "all-generated-output":[]}
+
+
+## Logging the metrics
+log("Logging the metrics")
+import neptune
+
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	with_id = "IMG1-72",
+	capture_hardware_metrics = False
+)
+run["eval-2/hard-accuracy-percentage"] = (hard_match_counter/len(examples))*100
+run["eval-2/soft-accuracy-percentage"] = (soft_match_counter/len(examples))*100
+
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/readme.md b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/readme.md
new file mode 100644
index 0000000..c941f22
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/readme.md
@@ -0,0 +1,2 @@
+# DESCRIPTION
+We train from scratch the 30M parameter model on hodhaifa ~200M code snippets dataset
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/runid.txt b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/runid.txt
new file mode 100644
index 0000000..a0dfbe8
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/runid.txt
@@ -0,0 +1 @@
+IMG1-75
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/train/model-training-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/train/model-training-atmpt-1.log
new file mode 100644
index 0000000..0515f46
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/train/model-training-atmpt-1.log
@@ -0,0 +1,375 @@
+|ITERS: 400000 / 6792189 | COMP: 5.89% | RATE: 2.79 it./s | SPD: 0.3585 s/it.| ERT: (26, 12, 33, 24)                                                                                                    
+val>|ITERS: 500 / 500 | COMP: 100.00% | RATE: 8.38 it./s | SPD: 0.1194 s/it.| ERT: (0, 0, 0, 0) |                                                                                                       
+
+Importing ...:
+	--> took (0, 0, 0, 4)
+
+Starting the netpune logging:
+	--> neptune init
+	--> saving the runid
+
+Set the random seed for reproducibility:
+
+Set the device to GPU if available, otherwise CPU:
+	--> device set to cuda.
+
+Setting arch-hyperparams for the GPT model:
+
+Loading the training and evaluation data:
+	--> train.bin
+	--> took (0, 0, 2, 53)
+	--> val.bin
+	--> took (0, 0, 1, 31)
+
+Setting train-hyperparams and util variables:
+	--> max_iters = 6792189
+
+Defining the model and utilities:
+
+The model:
+	--> def get random batch of data
+	--> def estimate loss
+	--> def human readable
+
+Loading the meta object:
+	--> loading
+	--> setting vocab size
+
+Creating the model:
+
+The model has 30M trainable parameters:
+
+Preparing for the training loop:
+	--> initialiazing the optimizer
+	--> initializing the learing rate scheduler
+	--> computing the initial loss
+	--> saving the last loss for early stopping
+	--> neptune logging the initial loss
+
+Training loop:
+
+==========================================================================================:
+
+2024-10-02_03-21 : iter     0 <=> epoch 0 | train loss 4.1529 | val loss 4.1522:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_03-52 : iter       5000 <=> epoch 0.002208418986214694 | train loss 0.6571505666 | val loss 0.6579629183:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_04-25 : iter      10000 <=> epoch 0.004416837972429388 | train loss 0.6380867362 | val loss 0.6404500604:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_04-56 : iter      15000 <=> epoch 0.006625256958644082 | train loss 0.6334573627 | val loss 0.6319733858:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_05-28 : iter      20000 <=> epoch 0.008833675944858775 | train loss 0.6256970167 | val loss 0.6267494559:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_06-00 : iter      25000 <=> epoch 0.01104209493107347 | train loss 0.6232506037 | val loss 0.6239672303:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_06-31 : iter      30000 <=> epoch 0.013250513917288164 | train loss 0.6182540655 | val loss 0.6192226410:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_07-03 : iter      35000 <=> epoch 0.01545893290350286 | train loss 0.6181470156 | val loss 0.6167954206:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_07-35 : iter      40000 <=> epoch 0.01766735188971755 | train loss 0.6143714786 | val loss 0.6150822043:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_08-06 : iter      45000 <=> epoch 0.019875770875932248 | train loss 0.6149205565 | val loss 0.6152332425:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_08-38 : iter      50000 <=> epoch 0.02208418986214694 | train loss 0.6136812568 | val loss 0.6140612364:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_09-10 : iter      55000 <=> epoch 0.024292608848361635 | train loss 0.6113308072 | val loss 0.6127268076:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_09-41 : iter      60000 <=> epoch 0.026501027834576328 | train loss 0.6131508350 | val loss 0.6124062538:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_10-13 : iter      65000 <=> epoch 0.02870944682079102 | train loss 0.6119241118 | val loss 0.6104813814:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_10-44 : iter      70000 <=> epoch 0.03091786580700572 | train loss 0.6094288826 | val loss 0.6097817421:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_11-16 : iter      75000 <=> epoch 0.03312628479322041 | train loss 0.6084360480 | val loss 0.6100493073:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_11-47 : iter      80000 <=> epoch 0.0353347037794351 | train loss 0.6091048717 | val loss 0.6084110737:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_12-19 : iter      85000 <=> epoch 0.0375431227656498 | train loss 0.6080473661 | val loss 0.6088900566:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_12-51 : iter      90000 <=> epoch 0.039751541751864496 | train loss 0.6065958142 | val loss 0.6091149449:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_13-22 : iter      95000 <=> epoch 0.041959960738079186 | train loss 0.6067644358 | val loss 0.6073774695:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_13-54 : iter     100000 <=> epoch 0.04416837972429388 | train loss 0.6087557077 | val loss 0.6070531011:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_14-26 : iter     105000 <=> epoch 0.04637679871050857 | train loss 0.6064656973 | val loss 0.6076139808:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_14-57 : iter     110000 <=> epoch 0.04858521769672327 | train loss 0.6067269444 | val loss 0.6059607267:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_15-29 : iter     115000 <=> epoch 0.050793636682937966 | train loss 0.6051300168 | val loss 0.6057843566:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_16-01 : iter     120000 <=> epoch 0.053002055669152656 | train loss 0.6054717898 | val loss 0.6063292623:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_16-33 : iter     125000 <=> epoch 0.05521047465536735 | train loss 0.6061344743 | val loss 0.6057439446:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_17-04 : iter     130000 <=> epoch 0.05741889364158204 | train loss 0.6056591272 | val loss 0.6078667045:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_17-36 : iter     135000 <=> epoch 0.05962731262779674 | train loss 0.6042066813 | val loss 0.6042064428:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_18-08 : iter     140000 <=> epoch 0.06183573161401144 | train loss 0.6045489907 | val loss 0.6054960489:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_18-39 : iter     145000 <=> epoch 0.06404415060022613 | train loss 0.6060239077 | val loss 0.6045843959:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_19-11 : iter     150000 <=> epoch 0.06625256958644082 | train loss 0.6052364111 | val loss 0.6045990586:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_19-43 : iter     155000 <=> epoch 0.06846098857265552 | train loss 0.6037033796 | val loss 0.6042101979:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_20-14 : iter     160000 <=> epoch 0.0706694075588702 | train loss 0.6028453708 | val loss 0.6030650139:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_20-46 : iter     165000 <=> epoch 0.0728778265450849 | train loss 0.6033636928 | val loss 0.6030762196:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_21-18 : iter     170000 <=> epoch 0.0750862455312996 | train loss 0.6033759713 | val loss 0.6036798358:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_21-49 : iter     175000 <=> epoch 0.0772946645175143 | train loss 0.6030404568 | val loss 0.6033511758:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_22-21 : iter     180000 <=> epoch 0.07950308350372899 | train loss 0.6044967771 | val loss 0.6030460596:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_22-53 : iter     185000 <=> epoch 0.08171150248994367 | train loss 0.6035633087 | val loss 0.6050740480:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_23-24 : iter     190000 <=> epoch 0.08391992147615837 | train loss 0.6038053632 | val loss 0.6032382846:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-02_23-56 : iter     195000 <=> epoch 0.08612834046237307 | train loss 0.6042562723 | val loss 0.6038897038:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_00-28 : iter     200000 <=> epoch 0.08833675944858776 | train loss 0.6017351747 | val loss 0.6056787372:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_00-59 : iter     205000 <=> epoch 0.09054517843480246 | train loss 0.6015030742 | val loss 0.6030330658:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_01-31 : iter     210000 <=> epoch 0.09275359742101714 | train loss 0.6015931368 | val loss 0.6033543944:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_02-03 : iter     215000 <=> epoch 0.09496201640723184 | train loss 0.6019971967 | val loss 0.6033992171:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_02-34 : iter     220000 <=> epoch 0.09717043539344654 | train loss 0.6018151641 | val loss 0.6013380289:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_03-06 : iter     225000 <=> epoch 0.09937885437966124 | train loss 0.6036648154 | val loss 0.6036788225:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_03-38 : iter     230000 <=> epoch 0.10158727336587593 | train loss 0.6032592654 | val loss 0.6019942760:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_04-10 : iter     235000 <=> epoch 0.10379569235209062 | train loss 0.6024428010 | val loss 0.6012475491:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_04-41 : iter     240000 <=> epoch 0.10600411133830531 | train loss 0.6012635827 | val loss 0.6030955911:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_05-13 : iter     245000 <=> epoch 0.10821253032452001 | train loss 0.6025944948 | val loss 0.6043007374:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_05-45 : iter     250000 <=> epoch 0.1104209493107347 | train loss 0.6028639674 | val loss 0.6026862860:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_06-16 : iter     255000 <=> epoch 0.1126293682969494 | train loss 0.6010313034 | val loss 0.6015847325:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_06-48 : iter     260000 <=> epoch 0.11483778728316409 | train loss 0.6020066738 | val loss 0.6025670767:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_07-20 : iter     265000 <=> epoch 0.11704620626937878 | train loss 0.6015477777 | val loss 0.6017661095:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_07-51 : iter     270000 <=> epoch 0.11925462525559348 | train loss 0.6013019085 | val loss 0.6016370058:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_08-23 : iter     275000 <=> epoch 0.12146304424180818 | train loss 0.6016970277 | val loss 0.6008374095:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_08-55 : iter     280000 <=> epoch 0.12367146322802287 | train loss 0.6014593244 | val loss 0.6020722985:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_09-26 : iter     285000 <=> epoch 0.12587988221423757 | train loss 0.6010342836 | val loss 0.6017381549:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_09-58 : iter     290000 <=> epoch 0.12808830120045225 | train loss 0.6010420322 | val loss 0.6019151211:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_10-30 : iter     295000 <=> epoch 0.13029672018666694 | train loss 0.6023580432 | val loss 0.6013076305:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_11-01 : iter     300000 <=> epoch 0.13250513917288165 | train loss 0.5998196006 | val loss 0.6012403369:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_11-33 : iter     305000 <=> epoch 0.13471355815909633 | train loss 0.6001894474 | val loss 0.6017575264:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_12-05 : iter     310000 <=> epoch 0.13692197714531104 | train loss 0.6002645493 | val loss 0.6019032598:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_12-36 : iter     315000 <=> epoch 0.13913039613152572 | train loss 0.6006474495 | val loss 0.6008545756:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_13-08 : iter     320000 <=> epoch 0.1413388151177404 | train loss 0.6029533744 | val loss 0.6005489230:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_13-40 : iter     325000 <=> epoch 0.14354723410395512 | train loss 0.6018894911 | val loss 0.6025612354:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_14-11 : iter     330000 <=> epoch 0.1457556530901698 | train loss 0.6008696556 | val loss 0.6000924110:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_14-43 : iter     335000 <=> epoch 0.1479640720763845 | train loss 0.6011112928 | val loss 0.6019214392:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_15-15 : iter     340000 <=> epoch 0.1501724910625992 | train loss 0.6003690958 | val loss 0.6016303897:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_15-46 : iter     345000 <=> epoch 0.15238091004881388 | train loss 0.5997262597 | val loss 0.6024017930:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_16-18 : iter     350000 <=> epoch 0.1545893290350286 | train loss 0.6012148857 | val loss 0.6013091803:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_16-50 : iter     355000 <=> epoch 0.15679774802124327 | train loss 0.6021685600 | val loss 0.6004719734:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_17-21 : iter     360000 <=> epoch 0.15900616700745798 | train loss 0.6010605097 | val loss 0.6013142467:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_17-53 : iter     365000 <=> epoch 0.16121458599367267 | train loss 0.6015254855 | val loss 0.6015218496:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_18-25 : iter     370000 <=> epoch 0.16342300497988735 | train loss 0.6009197235 | val loss 0.6010703444:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_18-56 : iter     375000 <=> epoch 0.16563142396610206 | train loss 0.6014112830 | val loss 0.6005527377:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_19-29 : iter     380000 <=> epoch 0.16783984295231674 | train loss 0.5985925198 | val loss 0.6005563736:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_20-00 : iter     385000 <=> epoch 0.17004826193853145 | train loss 0.6014177799 | val loss 0.5993712544:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_20-32 : iter     390000 <=> epoch 0.17225668092474614 | train loss 0.6006208062 | val loss 0.6011231542:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_21-04 : iter     395000 <=> epoch 0.17446509991096082 | train loss 0.5998015404 | val loss 0.6012008786:
+	--> training ...
+	--> checkpointing ...
+
+2024-10-03_21-36 : iter     400000 <=> epoch 0.17667351889717553 | train loss 0.5984551311 | val loss 0.6013513803:
+
+EARLY STOPPING at iter 400000 == epoch 0.17667351889717553:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/train/model-training-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/train/model-training-atmpt-1.py
new file mode 100644
index 0000000..d013be7
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-5/train/model-training-atmpt-1.py
@@ -0,0 +1,419 @@
+# Model training
+
+## ATMPT-1 (on Greene)
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-5/"
+DDIR = "/scratch/hb3020/training/60M-model-200M-data/training/"
+
+## Logging boilerplate
+log_file = open(DIR+"model-training-atmpt-1.log", "w")
+# progress bar reception string
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds 
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+
+log("Importing ...")
+import time
+
+before = time.time()
+import random
+import datetime
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+after = time.time()
+log(f"took {convert_seconds(after - before)}")
+
+## Starting the netpune logging
+log("Starting the netpune logging")
+log("neptune init")
+import neptune
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	tags = ["30M-model", "~200M-code-snippets"],
+	description = "We train from scratch the 30M parameters model on hodhaifa ~ 200M dataset with the full loading of the data into ram for acceleration ",
+	capture_hardware_metrics = False,
+)
+# First attempt so we log the runid
+log("saving the runid")
+runid = run["sys/id"].fetch()
+with open(DIR + "runid.txt", "w") as f:
+	f.write(runid)
+
+
+## Set the random seed for reproducibility
+log("Set the random seed for reproducibility")
+seed = 42
+torch.manual_seed(seed)
+random.seed(seed)
+np.random.seed(seed)
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+log(f"device set to {device}.")
+
+
+## Setting arch-hyperparams for the GPT model
+log("Setting arch-hyperparams for the GPT model")
+run["arch-hyperparams/block_size"] = block_size = 256  # Maximum context length
+run["arch-hyperparams/n_embd"] = n_embd = 642	  # Embedding dimension
+run["arch-hyperparams/n_head"] = n_head = 6		# Number of attention heads
+run["arch-hyperparams/n_layer"] = n_layer = 6	   # Number of transformer blocks
+
+# Loading the training and evaluation data
+log("Loading the training and evaluation data")
+log("train.bin")
+before = time.time()
+train_data = np.memmap(DDIR+"train.bin", dtype = np.uint16, mode="r")
+train_data = np.array(train_data)
+after = time.time()
+log(f"took {convert_seconds(after - before)}")
+
+log("val.bin")
+before = time.time()
+val_data = np.memmap(DDIR+"val.bin", dtype = np.uint16, mode="r")
+val_data = np.array(val_data)
+after = time.time()
+log(f"took {convert_seconds(after - before)}")
+
+# Setting the train-hyperparams and util variables
+log("Setting train-hyperparams and util variables")
+run["train-hyperparams/batch_size"] = batch_size = 64   # Batch size for training
+run["train-hyperparams/dropout"] = dropout = 0	   # Dropout rate
+run["train-hyperparms/max_pseudo_epochs"] = max_pseudo_epochs = 3
+run["train-hyperparams/learning_rate"] = learning_rate = 1e-3 # Initial Learning rate value
+run["train-hypeparams/max_degradations"] = max_degradations = 3 # number of consecutive degradations on val loss before stoping the training
+eval_interval = 5000 # Evaluation interval
+eval_iters = 500  # Number of iterations for evaluation
+max_iters = int( ( max_pseudo_epochs * len(train_data) ) / ( batch_size * block_size ) )
+log(f"max_iters = {max_iters}")
+miles = [int(max_iters * m) for m in [0.7, 0.8, 0.9]]  # Milestones for learning rate decay as fractions of max_iters
+run["train-hyperparams/miles"] = str(miles)
+
+
+
+compile = False # requires PyTorch 2.0
+
+## Defining the model and utilities
+log("Defining the model and utilities")
+log("The model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+
+# get random batch of data
+log("def get random batch of data")
+def get_batch(split):
+	data = train_data if split == 'train' else val_data
+	ix = torch.randint(len(data) - block_size, (batch_size,))
+	x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
+	y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
+	x, y = x.to(device), y.to(device)
+	return x, y
+
+# estimate loss on train and val splits
+log("def estimate loss")
+@torch.no_grad()
+def estimate_loss():
+	out = {}
+	model.eval()
+	for split in ['train', 'val']:
+		losses = torch.zeros(eval_iters)
+		for k in range(eval_iters):
+			past = time.time() 
+			X, Y = get_batch(split)
+			logits, loss = model(X, Y)
+			losses[k] = loss.item()
+			present = time.time()
+			log(f"{split}>|ITERS: {k+1} / {eval_iters} | COMP: {(k+1)/eval_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((eval_iters-k-1) * (present-past))} |", p_level = 2)
+		out[split] = losses.mean()
+	model.train()
+	return out
+
+# helper function to make large numbers of parameters human-readable
+log("def human readable")
+def human_readable(num):
+	magnitude = 0
+	while abs(num) >= 1000:
+		magnitude += 1
+		num /= 1000.0
+	return '%.0f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
+
+
+## Loading the meta object
+log("Loading the meta object")
+log("loading")
+import pickle
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+log("setting vocab size")
+vocab_size = meta["vocab_size"] 
+
+
+## Creating a new model
+log("Creating the model")
+model = GPT()
+m = model.to(device)
+num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+if compile:
+	print("compiling the model... (takes a ~minute)")
+	model = torch.compile(model) 
+num_parameters_hr = human_readable(num_parameters)
+log(f'The model has {num_parameters_hr} trainable parameters')
+
+
+## Preparing for the training loop
+log("Preparing for the training loop")
+
+# initializing the optimizer
+log("initialiazing the optimizer")
+optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
+
+# initializing the learning rate scheduler
+log("initializing the learing rate scheduler")
+scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=miles, gamma=0.1)
+
+
+# computing the initial loss
+log("computing the initial loss")
+losses = estimate_loss()
+
+# saving the last_losses for early stopping
+log("saving the last loss for early stopping")
+last_losses = losses
+best_val_loss = losses["val"]
+
+# neptune logging the initial loss
+log("neptune logging the initial loss")
+run["losses_trace/train_loss"].append(losses["train"], step = 0)
+run["losses_trace/val_loss"].append(losses["val"], step = 0)
+
+
+## Training loop
+log("Training loop")
+
+import os
+os.makedirs(os.path.dirname(DIR+"checkpoints/"), exist_ok=True)
+
+log("==========================================================================================")
+early_stopping = {"state": False, "iter": None, "epoch": None}
+now = datetime.datetime.now()
+date_hour = now.strftime("%Y-%m-%d_%H-%M")
+log(f'{date_hour} : iter {0:5d} <=> epoch 0 | train loss {losses["train"]:.4f} | val loss {losses["val"]:.4f}')
+nb_degradations = 0
+
+
+log("training ...")
+for iter in range(max_iters):
+	past = time.time()
+	# train the model for one iteration
+	xb, yb = get_batch('train')
+	# forward pass
+	logits, loss = model(xb, yb)
+	optimizer.zero_grad(set_to_none=True)
+	loss.backward()
+	optimizer.step()
+	# Step the scheduler
+	scheduler.step()
+
+	present = time.time()
+	log(f"|ITERS: {iter+1} / {max_iters} | COMP: {(iter+1)/max_iters * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((max_iters-iter-1) * (present-past))}", p_level = 1)
+	
+	# evaluate the model on the train and val splits and log the losses
+	if (iter+1) % eval_interval == 0:
+		log("checkpointing ...")
+		epoch = (block_size * batch_size * (iter+1))/len(train_data)
+		losses = estimate_loss()
+		now = datetime.datetime.now()
+		date_hour = now.strftime("%Y-%m-%d_%H-%M")
+		log(f'{date_hour} : iter {iter+1:10d} <=> epoch {epoch} | train loss {losses["train"]:.10f} | val loss {losses["val"]:.10f}')
+		if losses["val"] > last_losses["val"]:
+			nb_degradations += 1
+			if nb_degradations == max_degradations:
+				log(f"EARLY STOPPING at iter {iter+1} == epoch {epoch}")
+				early_stopping = {"state": True, "iter": iter+1, "epoch": epoch}
+				break
+		else:
+			nb_degradations = 0
+			
+		# Logging the losses trace
+		run["losses_trace/train_loss"].append(losses["train"], step = epoch)
+		run["losses_trace/val_loss"].append(losses["val"], step = epoch)
+		
+		# Saving the last_losses
+		last_losses = losses
+		
+		# Saving the model
+		now = datetime.datetime.now()
+		date_hour = now.strftime("%Y-%m-%d_%H-%M")
+		torch.save(model.state_dict(), f"{DIR}checkpoints/checkpoint_iter{iter+1}_epoch{epoch:.2f}_{date_hour}.pth")
+		if losses["val"] < best_val_loss:
+			best_val_loss = losses["val"]
+			torch.save(model.state_dict(), f"{DIR}best-model.pth")
+			with open(f"{DIR}best-model.info", "w") as f:
+				f.write(f"iter : {iter+1}\n")
+				f.write(f"epoch : {epoch}\n")
+				f.write(f"date-hour : {date_hour}\n")
+		log("training ...")
+
+run["early_stopping"] = early_stopping
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1-slurm-51582619.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1-slurm-51582619.out
new file mode 100644
index 0000000..1b5341b
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1-slurm-51582619.out
@@ -0,0 +1,3 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-6/eval-1-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"best_model_iter385000_epoch0.17_2024-10-02_02-01.pth"))
+slurmstepd: error: *** JOB 51582619 ON gr004 CANCELLED AT 2024-10-02T22:33:05 ***
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1.log
new file mode 100644
index 0000000..6d4d775
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1.log
@@ -0,0 +1,20 @@
+|ITERS: 129191 / 20000000 | COMP: 0.65% | RATE: 3.90 it./s | SPD: 0.2567 s/it.| ERT: (59, 1, 2, 38) |                                                                                                   
+|hard-accuracy: 24.50% | soft-accuracy: 0.92% |                                                                                                                                                         
+
+Imports:
+
+Set the device to GPU if available, otherwise CPU:
+
+Loading the encode and decode functions and setting the vocab_size from the meta object:
+
+Redefining the model of the training:
+
+Loading the model:
+
+Creating and loading the model:
+
+Reading the test data:
+
+Splitting the test data into examples:
+
+Sequential Evaluation loop:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1.py
new file mode 100644
index 0000000..5edc971
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-1/eval-1-atmpt-1.py
@@ -0,0 +1,336 @@
+#@ EVAL-1-ATMPT-1
+#@ We evaluate the 20M model on hodhaifa generated test.txt for the 200M code snippets dataset
+
+## On Greene
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-6/"
+DDIR  = "/scratch/hb3020/training/60M-model-200M-data/training/"
+DDIR2 = "/scratch/hb3020/training/60M-model-200M-data/"
+
+## Logging boilerplate
+log_file = open(DIR+"eval-1-atmpt-1.log","w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+	
+
+## Imports
+log("Imports")
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Device set to {device}.")
+
+
+## Loading the encode and decode functions and setting the vocab_size from the meta object
+log("Loading the encode and decode functions and setting the vocab_size from the meta object")
+import pickle
+
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+itos = meta["itos"]
+
+def encode(s):
+	return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+
+def decode(l):
+	return ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+vocab_size = meta["vocab_size"]
+
+
+## Redefining the model of the training
+log("Redefining the model of the training")
+block_size = 256  # Maximum context length
+n_embd = 912  # Embedding dimension
+n_head = 6		# Number of attention heads
+n_layer = 6	   # Number of transformer blocks
+dropout = 0	   # Dropout rate
+batch_size = 64   # Batch size for training
+
+
+## Loading the model
+log("Loading the model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	@torch.no_grad()
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+	
+
+## Creating and loading the model
+log("Creating and loading the model")
+model = GPT()
+model.eval()
+model.load_state_dict(torch.load(DIR+"best_model_iter385000_epoch0.17_2024-10-02_02-01.pth"))
+model.to(device)
+
+
+## Reading the test data
+log("Reading the test data")
+with open(DDIR2+"test.txt", "r") as f:
+	test_data = f.read()
+
+
+## Splitting the test data into examples
+log("Splitting the test data into examples")
+examples = test_data.split("\n\n")
+examples = examples[:]
+
+
+## Sequential Evaluation loop
+log("Sequential Evaluation loop")
+hard_match_counter = 0
+soft_match_counter = 0
+failures_counter = 0
+
+hard_match_successes = {"example":[], "all-generated-output":[]} # correct generated output + correct stopping (no hallucination) i.e. fully correct
+soft_match_successes = {"example":[], "all-generated-output":[]} # correct initial generated output BUT uncorrect stopping (hallucination)
+failures = {"example":[], "all-generated-output":[]} # completely uncorrect answer
+
+import time
+import pandas as pd
+import os
+
+os.makedirs(os.path.dirname(DIR+"eval-1-atmpt-1-results/"), exist_ok = True)
+
+checkpoint_interval = 5000
+
+hard_match_base = 0
+soft_match_base = 0
+failures_base = 0
+
+for i, example in enumerate(examples):
+	
+	past = time.time()
+	
+	# generating the output of the model
+	example_match = example.split("# output\n")
+	example_prompt = example_match[0] + "# output\n"
+	example_output = example_match[1]
+	example_prompt_tensor = torch.tensor(encode(example_prompt), dtype=torch.long).unsqueeze(0).to(device)
+	generated_example = decode(model.generate(example_prompt_tensor, max_new_tokens = len(example_output) + 20)[0].tolist())
+	
+	generated_output  = generated_example[len(example_prompt):]
+	example_code_generated_output = generated_output.split("\n\n")[0]
+	# if hard match
+	if example_code_generated_output == example_output:
+		hard_match_counter += 1
+		hard_match_successes["example"].append(example)
+		hard_match_successes["all-generated-output"].append(generated_output+"@")
+	# elif soft checking
+	elif example_code_generated_output[:len(example_output)] == example_output:
+		soft_match_counter += 1
+		soft_match_successes["example"].append(example)
+		soft_match_successes["all-generated-output"].append(generated_output+"@")
+	# else complete failure
+	else:
+		failures_counter += 1
+		failures["example"].append(example)
+		failures["all-generated-output"].append(generated_output+"@")
+	
+	present = time.time()
+	
+	log(f"|ITERS: {i+1} / {len(examples)} | COMP: {(i+1)/len(examples) * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((len(examples)-i-1) * (present-past))} |", p_level = 1)
+	log(f"|hard-accuracy: {(hard_match_counter/(i+1))*100:.2f}% | soft-accuracy: {(soft_match_counter/(i+1))*100:.2f}% |", p_level = 2)
+
+	if (i+1) % checkpoint_interval == 0:
+		
+		mode, header = ("w",True) if (i+1) == checkpoint_interval else ("a", False)
+		
+		hard_match_successes_df = pd.DataFrame(hard_match_successes)
+		soft_match_successes_df = pd.DataFrame(soft_match_successes)
+		failures_df = pd.DataFrame(failures)
+
+		hard_match_successes_df.index = hard_match_successes_df.index + hard_match_base
+		soft_match_successes_df.index = soft_match_successes_df.index + soft_match_base
+		failures_df.index = failures_df.index + failures_base
+		
+		hard_match_base = hard_match_counter
+		soft_match_base = soft_match_counter
+		failures_base = failures_counter
+
+		hard_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-hard-match-successes.csv", mode = mode, header = header)
+		soft_match_successes_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-soft-match-successes.csv", mode = mode, header = header)
+		failures_df.to_csv(DIR+"eval-1-atmpt-1-results/eval-1-atmpt-1-failures.csv", mode = mode, header = header)
+		
+		hard_match_successes = {"example":[], "all-generated-output":[]}
+		soft_match_successes = {"example":[], "all-generated-output":[]}
+		failures = {"example":[], "all-generated-output":[]}
+
+
+## Logging the metrics
+log("Logging the metrics")
+import neptune
+
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	with_id = "IMG1-72",
+	capture_hardware_metrics = False
+)
+run["eval-1/hard-accuracy-percentage"] = (hard_match_counter/len(examples))*100
+run["eval-1/soft-accuracy-percentage"] = (soft_match_counter/len(examples))*100
+
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1-slurm-51645933.out b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1-slurm-51645933.out
new file mode 100644
index 0000000..9e85427
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1-slurm-51645933.out
@@ -0,0 +1,3 @@
+/scratch/yb2618/tiny-lm-full-random-mode/ae-6/eval-2-atmpt-1.py:226: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  model.load_state_dict(torch.load(DIR+"best_model_iter435000_epoch0.19_2024-10-02_10-36.pth"))
+slurmstepd: error: *** JOB 51645933 ON gv012 CANCELLED AT 2024-10-04T09:04:11 ***
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1.log b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1.log
new file mode 100644
index 0000000..2dac49e
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1.log
@@ -0,0 +1,20 @@
+|ITERS: 163773 / 20000000 | COMP: 0.82% | RATE: 1.57 it./s | SPD: 0.6359 s/it.| ERT: (145, 23, 35, 49) |                                                                                                
+|hard-accuracy: 29.58% | soft-accuracy: 0.80% |                                                                                                                                                         
+
+Imports:
+
+Set the device to GPU if available, otherwise CPU:
+
+Loading the encode and decode functions and setting the vocab_size from the meta object:
+
+Redefining the model of the training:
+
+Loading the model:
+
+Creating and loading the model:
+
+Reading the test data:
+
+Splitting the test data into examples:
+
+Sequential Evaluation loop:
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1.py b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1.py
new file mode 100644
index 0000000..2a83d98
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/evals/eval-2/eval-2-atmpt-1.py
@@ -0,0 +1,336 @@
+#@ EVAL-1-ATMPT-1
+#@ We evaluate the 20M model on hodhaifa generated test.txt for the 200M code snippets dataset
+
+## On Greene
+DIR = "/scratch/yb2618/tiny-lm-full-random-mode/ae-6/"
+DDIR  = "/scratch/hb3020/training/60M-model-200M-data/training/"
+DDIR2 = "/scratch/hb3020/training/60M-model-200M-data/"
+
+## Logging boilerplate
+log_file = open(DIR+"eval-2-atmpt-1.log","w")
+pbar_recept_string = " " * 200 + "\n"
+log_file.write(pbar_recept_string)
+log_file.write(pbar_recept_string)
+log_file.flush()
+def log(s:str, p_level=None):
+	if p_level == 1:
+		log_file.seek(0,0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(0,0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	elif p_level == 2:
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(pbar_recept_string)
+		log_file.seek(len(pbar_recept_string), 0)
+		log_file.write(s)
+		log_file.seek(0,2)
+	else:
+		if s[0].upper() == s[0]:
+			start = "\n"
+			end = ":"
+		else:
+			start = "	--> "
+			end = ""
+		log_file.write(start + s + end + "\n")
+	log_file.flush()
+
+
+## Convert seconds to days, hours, minutes, seconds
+def convert_seconds(seconds:float):
+	# ignoring the sub seconds
+	seconds = int(seconds)
+	days, seconds = divmod(seconds, 86400)
+	hours, seconds = divmod(seconds, 3600)
+	minutes, seconds = divmod(seconds, 60)
+	return (days, hours, minutes, seconds)
+	
+
+## Imports
+log("Imports")
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+## Set the device to GPU if available, otherwise CPU
+log("Set the device to GPU if available, otherwise CPU")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Device set to {device}.")
+
+
+## Loading the encode and decode functions and setting the vocab_size from the meta object
+log("Loading the encode and decode functions and setting the vocab_size from the meta object")
+import pickle
+
+with open(DDIR+"meta.pkl", "rb") as f:
+	meta = pickle.load(f)
+stoi = meta["stoi"]
+itos = meta["itos"]
+
+def encode(s):
+	return [stoi[c] for c in s] # encoder: take a string, output a list of integers
+
+def decode(l):
+	return ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+
+vocab_size = meta["vocab_size"]
+
+
+## Redefining the model of the training
+log("Redefining the model of the training")
+block_size = 256  # Maximum context length
+n_embd = 912  # Embedding dimension
+n_head = 6		# Number of attention heads
+n_layer = 6	   # Number of transformer blocks
+dropout = 0	   # Dropout rate
+batch_size = 64   # Batch size for training
+
+
+## Loading the model
+log("Loading the model")
+class LayerNorm(nn.Module):
+	""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+	def __init__(self, ndim, bias):
+		super().__init__()
+		self.weight = nn.Parameter(torch.ones(ndim))
+		self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+	def forward(self, input):
+		return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+class Head(nn.Module):
+	"""One head of self-attention."""
+
+	def __init__(self, head_size):
+		super().__init__()
+		self.key = nn.Linear(n_embd, head_size, bias=False)
+		self.query = nn.Linear(n_embd, head_size, bias=False)
+		self.value = nn.Linear(n_embd, head_size, bias=False)
+		self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+
+		self.dropout = nn.Dropout(dropout)
+
+	def forward(self, x):
+		B,T,C = x.shape
+		k = self.key(x)   # (B, T, 16)
+		q = self.query(x) # (B, T, 16)
+		v = self.value(x)
+		
+		out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=dropout if self.training else 0, is_causal=True)
+			
+		return out
+
+class MultiHeadAttention(nn.Module):
+	"""multiple heads of self-attention in parallel."""
+
+	def __init__(self, num_heads, head_size):
+		super().__init__()
+		self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+		self.proj = nn.Linear(n_embd, n_embd)
+		self.dropout = nn.Dropout(dropout)
+		
+	def forward(self, x):
+		out = torch.cat([h(x) for h in self.heads], dim=-1)
+		out = self.dropout(self.proj(out))
+		return out
+	
+class FeedForward(nn.Module):
+	""" a simple linear layer followed by a non-linearity."""
+
+	def __init__(self, n_embd):
+		super().__init__()
+		self.net = nn.Sequential(
+			nn.Linear(n_embd, 4 * n_embd, bias=False),
+			nn.GELU(),
+			nn.Linear( 4 * n_embd, n_embd, bias=False),
+			nn.Dropout(dropout),
+		)
+
+	def forward(self, x):
+		return self.net(x)
+	
+class Block(nn.Module):
+	""" Transformer block: communication followed by feedforward."""
+
+	def __init__(self, n_embd, n_head):
+		super().__init__()
+		head_size = n_embd // n_head
+		self.sa = MultiHeadAttention(n_head, head_size)
+		self.ffwd = FeedForward(n_embd)
+		self.ln1 = nn.LayerNorm(n_embd, bias=False)
+		self.ln2 = nn.LayerNorm(n_embd, bias=False)
+
+	def forward(self, x):
+		x = x + self.sa(self.ln1(x))
+		x = x + self.ffwd(self.ln2(x))
+		return x
+
+class GPT(nn.Module):
+
+	def __init__(self):
+		super().__init__()
+		# each token directly reads off the logits for the next token from a lookup table
+		self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
+		self.position_embedding_table = nn.Embedding(block_size, n_embd)
+		self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
+		self.ln_f = nn.LayerNorm(n_embd, bias=False) 
+		self.lm_head = nn.Linear(n_embd, vocab_size)
+
+	def forward(self, idx, targets=None):
+		B, T = idx.shape
+
+		# idx and targets are both (B,T) tensor of integers
+		tok_emb = self.token_embedding_table(idx) # (B,T,C)
+		pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
+		x = tok_emb + pos_emb # (B,T,C)
+		x = self.blocks(x) # (B,T,C)
+		x = self.ln_f(x) # (B,T,C)
+		logits = self.lm_head(x) # (B,T,vocab_size)
+
+		if targets is None:
+			loss = None
+		else:
+			B, T, C = logits.shape
+			logits = logits.view(B*T, C)
+			targets = targets.view(B*T)
+			loss = F.cross_entropy(logits, targets)
+
+		return logits, loss
+	
+	@torch.no_grad()
+	def generate(self, idx, max_new_tokens):
+		# idx is (B, T) array of indices in the current context
+		for _ in range(max_new_tokens):
+			# crop idx to the last block_size tokens
+			idx_cond = idx[:, -block_size:] # (B, T)
+			# get the predictions
+			logits, loss = self(idx_cond)
+			# focus only on the last time step
+			logits = logits[:, -1, :] # becomes (B, C)
+			# apply softmax to get probabilities
+			probs = F.softmax(logits, dim=-1) # (B, C)
+			# sample from the distribution
+			_, idx_next = torch.max(probs, dim=1, keepdim=True) # (B, 1)
+			#idx_next = torch.multinomial(probs, num_samples=1)
+			# append sampled index to the running sequence
+			idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+		return idx
+	
+
+## Creating and loading the model
+log("Creating and loading the model")
+model = GPT()
+model.eval()
+model.load_state_dict(torch.load(DIR+"best_model_iter435000_epoch0.19_2024-10-02_10-36.pth"))
+model.to(device)
+
+
+## Reading the test data
+log("Reading the test data")
+with open(DDIR2+"test.txt", "r") as f:
+	test_data = f.read()
+
+
+## Splitting the test data into examples
+log("Splitting the test data into examples")
+examples = test_data.split("\n\n")
+examples = examples[:]
+
+
+## Sequential Evaluation loop
+log("Sequential Evaluation loop")
+hard_match_counter = 0
+soft_match_counter = 0
+failures_counter = 0
+
+hard_match_successes = {"example":[], "all-generated-output":[]} # correct generated output + correct stopping (no hallucination) i.e. fully correct
+soft_match_successes = {"example":[], "all-generated-output":[]} # correct initial generated output BUT uncorrect stopping (hallucination)
+failures = {"example":[], "all-generated-output":[]} # completely uncorrect answer
+
+import time
+import pandas as pd
+import os
+
+os.makedirs(os.path.dirname(DIR+"eval-2-atmpt-1-results/"), exist_ok = True)
+
+checkpoint_interval = 5000
+
+hard_match_base = 0
+soft_match_base = 0
+failures_base = 0
+
+for i, example in enumerate(examples):
+	
+	past = time.time()
+	
+	# generating the output of the model
+	example_match = example.split("# output\n")
+	example_prompt = example_match[0] + "# output\n"
+	example_output = example_match[1]
+	example_prompt_tensor = torch.tensor(encode(example_prompt), dtype=torch.long).unsqueeze(0).to(device)
+	generated_example = decode(model.generate(example_prompt_tensor, max_new_tokens = len(example_output) + 20)[0].tolist())
+	
+	generated_output  = generated_example[len(example_prompt):]
+	example_code_generated_output = generated_output.split("\n\n")[0]
+	# if hard match
+	if example_code_generated_output == example_output:
+		hard_match_counter += 1
+		hard_match_successes["example"].append(example)
+		hard_match_successes["all-generated-output"].append(generated_output+"@")
+	# elif soft checking
+	elif example_code_generated_output[:len(example_output)] == example_output:
+		soft_match_counter += 1
+		soft_match_successes["example"].append(example)
+		soft_match_successes["all-generated-output"].append(generated_output+"@")
+	# else complete failure
+	else:
+		failures_counter += 1
+		failures["example"].append(example)
+		failures["all-generated-output"].append(generated_output+"@")
+	
+	present = time.time()
+	
+	log(f"|ITERS: {i+1} / {len(examples)} | COMP: {(i+1)/len(examples) * 100:.2f}% | RATE: {1/(present-past):.2f} it./s | SPD: {present - past :.4f} s/it.| ERT: {convert_seconds((len(examples)-i-1) * (present-past))} |", p_level = 1)
+	log(f"|hard-accuracy: {(hard_match_counter/(i+1))*100:.2f}% | soft-accuracy: {(soft_match_counter/(i+1))*100:.2f}% |", p_level = 2)
+
+	if (i+1) % checkpoint_interval == 0:
+		
+		mode, header = ("w",True) if (i+1) == checkpoint_interval else ("a", False)
+		
+		hard_match_successes_df = pd.DataFrame(hard_match_successes)
+		soft_match_successes_df = pd.DataFrame(soft_match_successes)
+		failures_df = pd.DataFrame(failures)
+
+		hard_match_successes_df.index = hard_match_successes_df.index + hard_match_base
+		soft_match_successes_df.index = soft_match_successes_df.index + soft_match_base
+		failures_df.index = failures_df.index + failures_base
+		
+		hard_match_base = hard_match_counter
+		soft_match_base = soft_match_counter
+		failures_base = failures_counter
+
+		hard_match_successes_df.to_csv(DIR+"eval-2-atmpt-1-results/eval-1-atmpt-1-hard-match-successes.csv", mode = mode, header = header)
+		soft_match_successes_df.to_csv(DIR+"eval-2-atmpt-1-results/eval-1-atmpt-1-soft-match-successes.csv", mode = mode, header = header)
+		failures_df.to_csv(DIR+"eval-2-atmpt-1-results/eval-1-atmpt-1-failures.csv", mode = mode, header = header)
+		
+		hard_match_successes = {"example":[], "all-generated-output":[]}
+		soft_match_successes = {"example":[], "all-generated-output":[]}
+		failures = {"example":[], "all-generated-output":[]}
+
+
+## Logging the metrics
+log("Logging the metrics")
+import neptune
+
+run = neptune.init_run(
+	project="younes-boukacem-workspace/tiny-lm-full-random-mode",
+	api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiZGFmNDg2Yy01MzRkLTQwNGMtYmZmMy1hYzM0Y2NkY2QyNmMifQ==",
+	with_id = "IMG1-56",
+	capture_hardware_metrics = False
+)
+run["eval-2/hard-accuracy-percentage"] = (hard_match_counter/len(examples))*100
+run["eval-2/soft-accuracy-percentage"] = (soft_match_counter/len(examples))*100
+
+log_file.close()
\ No newline at end of file
diff --git a/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/readme.md b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/readme.md
new file mode 100644
index 0000000..cbed5ee
--- /dev/null
+++ b/xperiments/xpgroup-1-codeExecution-outputPrediction/xp-6/readme.md
@@ -0,0 +1,2 @@
+# Descritption
+We evaluate the latest checkpoint of the 60M model trained by hodhaifa on hodhaifa's test.txt created from the 200M code snippets dataset
\ No newline at end of file