From ff32eb263660b8dc883ae8452c7b5ef01c7d713f Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Tue, 20 Aug 2024 12:13:04 -0700
Subject: [PATCH 1/9] Implemented extract_audio_from_video function. Currently
 only accepts single file paths

---
 src/ilabs_streamsync/example_script.py | 54 +++++++++++++-------------
 src/ilabs_streamsync/streamsync.py     | 52 +++++++++++++++++++++++--
 2 files changed, 76 insertions(+), 30 deletions(-)

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index 710b7ed..b2443fd 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -1,34 +1,36 @@
 import mne
+from streamsync import StreamSync, extract_audio_from_video
 
-from ilabs_streamsync import StreamSync, extract_audio_from_video
+if __name__ == "__main__":
+    # load an MNE raw file
+    raw = None
+    cam1 = "/Users/ashtondoane/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
+    flux1 = None
+    my_events = []
 
-# load an MNE raw file
-raw = None
-cam1 = None
-flux1 = None
-my_events = []
+    extract_audio_from_video(cam1, "/Users/ashtondoane/VideoSync_NonSubject/output")
 
 
-subjects = ["146a", "222b"]
+    subjects = ["146a", "222b"]
 
-for subj in subjects:
-    # construct the filename/path
-    # load the Raw
-    # figure out where video files are & load them
-    audio1 = extract_audio_from_video(cam1)
+    # for subj in subjects:
+        # construct the filename/path
+        # load the Raw
+        # figure out where video files are & load them
+        # extract_audio_from_video(cam1)
 
-    ss = StreamSync(raw, "STIM001")
-    ss.add_stream(audio1)
-    ss.add_camera_events(my_events)
-    ss.add_stream(flux1)
-    result = ss.do_syncing()
-    fig = ss.plot_sync()
-    annot = ss.add_camera_events(my_events)
-    raw.set_annotations(annot)
-    fig.savefig(...)
-    if result < 0.7:
-        write_log_msg(f"subj {subj} had bad pulse syncing, aborting")
-        continue
+        # ss = StreamSync(raw, "STIM001")
+        # ss.add_stream(audio1)
+        # ss.add_camera_events(my_events)
+        # ss.add_stream(flux1)
+        # result = ss.do_syncing()
+        # fig = ss.plot_sync()
+        # annot = ss.add_camera_events(my_events)
+        # raw.set_annotations(annot)
+        # fig.savefig(...)
+        # if result < 0.7:
+        #     write_log_msg(f"subj {subj} had bad pulse syncing, aborting")
+        #     continue
 
-    # apply maxfilter
-    # do ICA
+        # apply maxfilter
+        # do ICA
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index c8a8552..4d2d2f5 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -1,3 +1,9 @@
+from __future__ import annotations
+
+import os
+import subprocess
+
+
 class StreamSync:
     """Synchronize two data streams.
 
@@ -36,12 +42,50 @@ def do_syncing(self):
         """Synchronize all streams with the reference stream."""
         # TODO (waves hands) do the hard part.
         # TODO spit out a report of correlation/association between all pairs of streams
-        pass
 
     def plot_sync(self):
         pass
 
 
-def extract_audio_from_video(path_to_video, channel):
-    """Path can be a regex or glob to allow batch processing."""
-    pass
+def extract_audio_from_video(path_to_video, output_dir):
+    """Extracts audio from path provided.
+
+    path_to_video: str
+        Path to audio file
+        TODO allow path_to_video to take regex?
+    output_dir: str
+        Path to directory where extracted audio should be sent
+
+    Effects:
+        Creates output directory if non-existent. For each video found, creates
+        a file with the associated audio labeled the same way.
+
+    Raises:
+        Exception if filename is taken in output_dir
+    """
+    audio_codecout = 'pcm_s16le'
+    audio_suffix = '_16bit'
+    audio_file = os.path.basename(path_to_video) + audio_suffix + '.wav'
+    if not os.path.exists(path_to_video):
+        raise ValueError('Path provided cannot be found.')
+    if os.path.exists(os.path.join(output_dir, audio_file)):
+        raise Exception("Audio already exists for " + path_to_video + " in output directory " + output_dir)
+
+    command = ['ffmpeg',
+        '-acodec', 'pcm_s24le',       # force little-endian format (req'd for Linux)
+        '-i', path_to_video,
+        '-map', '0:a',                # audio only (per DM)
+#         '-af', 'highpass=f=0.1',
+        '-acodec', audio_codecout,
+        '-ac', '2',                   # no longer mono output, so setting to "2"
+        '-y', '-vn',                  # overwrite output file without asking; no video
+        '-loglevel', 'error',
+        audio_file]
+    pipe = subprocess.run(command, timeout=50)
+
+    if pipe.returncode==0:
+        print('Audio extraction was successful for ' + path_to_video)
+        output_path = os.path.join(output_dir, audio_file)
+        os.renames(audio_file, output_path)
+    else:
+        print("Audio extraction unsuccessful for " + path_to_video)
\ No newline at end of file

From a72d108fa104b2dc281cbb7b62162823a86b005b Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Tue, 20 Aug 2024 12:26:43 -0700
Subject: [PATCH 2/9] Extraction implemented.

---
 src/ilabs_streamsync/example_script.py | 6 +++---
 src/ilabs_streamsync/streamsync.py     | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index b2443fd..94075d1 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -4,14 +4,14 @@
 if __name__ == "__main__":
     # load an MNE raw file
     raw = None
-    cam1 = "/Users/ashtondoane/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
+    cam1 = "/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
     flux1 = None
     my_events = []
 
-    extract_audio_from_video(cam1, "/Users/ashtondoane/VideoSync_NonSubject/output")
+    extract_audio_from_video(cam1, "/Users/user/VideoSync_NonSubject/output")
 
 
-    subjects = ["146a", "222b"]
+    # subjects = ["146a", "222b"]
 
     # for subj in subjects:
         # construct the filename/path
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index 4d2d2f5..455c63f 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -17,13 +17,13 @@ class StreamSync:
     def __init__(self, reference_object, pulse_channel):
         self.ref_stream = reference_object.get_chan(pulse_channel)
         self.sfreq = reference_object.info["sfreq"]  # Hz
-        self.streams = []
+        self.streams = [] 
 
     def add_stream(self, stream, channel=None, events=None):
         """Add a new ``Raw`` or video stream, optionally with events.
 
-        stream : Raw | wav
-            An audio or FIF stream.
+        stream : str
+            File path to an audio or FIF stream.
         channel : str | int | None
             Which channel of `stream` contains the sync pulse sequence.
         events : array-like | None
@@ -46,7 +46,6 @@ def do_syncing(self):
     def plot_sync(self):
         pass
 
-
 def extract_audio_from_video(path_to_video, output_dir):
     """Extracts audio from path provided.
 
@@ -61,6 +60,7 @@ def extract_audio_from_video(path_to_video, output_dir):
         a file with the associated audio labeled the same way.
 
     Raises:
+        ValueException if video path does not exist, 
         Exception if filename is taken in output_dir
     """
     audio_codecout = 'pcm_s16le'

From ff806592fcac5960f1d66d5b71211a4be9cfe56a Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Tue, 20 Aug 2024 13:14:44 -0700
Subject: [PATCH 3/9] Implemented add_stream for wav files.

---
 src/ilabs_streamsync/example_script.py |  8 ++--
 src/ilabs_streamsync/streamsync.py     | 55 +++++++++++++++++++-------
 2 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index 94075d1..b1518f6 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -4,12 +4,14 @@
 if __name__ == "__main__":
     # load an MNE raw file
     raw = None
-    cam1 = "/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
+    # cam1 = "/Users/ashtondoane/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
     flux1 = None
     my_events = []
 
-    extract_audio_from_video(cam1, "/Users/user/VideoSync_NonSubject/output")
-
+    # extract_audio_from_video(cam1, "/Users/ashtondoane/VideoSync_NonSubject/output")
+    ss = StreamSync(None, None)
+    ss.add_stream("/Users/ashtondoane/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
+    ss.plot_sync_pulses(tmin=0.998,tmax=1)
 
     # subjects = ["146a", "222b"]
 
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index 455c63f..f57af6d 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -3,6 +3,10 @@
 import os
 import subprocess
 
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.io.wavfile import read as wavread
+
 
 class StreamSync:
     """Synchronize two data streams.
@@ -15,13 +19,14 @@ class StreamSync:
     """
 
     def __init__(self, reference_object, pulse_channel):
-        self.ref_stream = reference_object.get_chan(pulse_channel)
-        self.sfreq = reference_object.info["sfreq"]  # Hz
-        self.streams = [] 
+        # self.ref_stream = reference_object.get_chan(pulse_channel)
+        self.ref_stream = None
+        # self.sfreq = reference_object.info["sfreq"]  # Hz
+        self.sfreq = 0
+        self.streams = [] #  of (filename, srate, Pulses, Data)
 
     def add_stream(self, stream, channel=None, events=None):
         """Add a new ``Raw`` or video stream, optionally with events.
-
         stream : str
             File path to an audio or FIF stream.
         channel : str | int | None
@@ -30,21 +35,43 @@ def add_stream(self, stream, channel=None, events=None):
             Events associated with the stream. TODO: should they be integer sample
             numbers? Timestamps? Do we support both?
         """
-        pulses = self._extract_pulse_sequence_from_stream(stream, channel=channel)
-        self.streams.append(pulses)
-
-    def _extract_pulse_sequence_from_stream(self, stream, channel):
-        # TODO triage based on input type (e.g., if it's a Raw, pull out a stim chan,
-        # if it's audio, just add it as-is)
+        srate, pulses, data = self._extract_data_from_stream(stream, channel=channel)
+        self.streams.append((stream, srate, pulses, data))
+
+    def _extract_data_from_stream(self, stream, channel):
+        """Extracts pulses and raw data from stream provided."""
+        ext = os.path.splitext(stream)[1]
+        if ext == ".fif":
+            return self._extract_data__from_raw(stream, channel)
+        if ext == ".wav":
+            return self._extract_data_from_wav(stream, channel)
+        raise TypeError("Stream provided was of unsupported format. Please provide a fif or wav file.")
+            
+
+    def _extract_data__from_raw(self, stream, channel):
         pass
 
+    def _extract_data_from_wav(self, stream, channel):
+        "Returns tuple of (pulse channel, audio channel) from stereo file."
+        srate, wav_signal = wavread(stream)
+        return (srate, wav_signal[:,channel], wav_signal[:,1-channel])
+
     def do_syncing(self):
         """Synchronize all streams with the reference stream."""
         # TODO (waves hands) do the hard part.
         # TODO spit out a report of correlation/association between all pairs of streams
 
-    def plot_sync(self):
-        pass
+    def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
+        # TODO Plot the raw file on the first plot.
+        fig, axset = plt.subplots(len(self.streams)+1, 1, figsize = [8,6]) #show individual channels seperately, and the 0th plot is the combination of these. 
+        for i, stream in enumerate(self.streams):
+            npts = len(stream[2])
+            tt = np.arange(npts) / stream[1]
+            idx = np.where((tt>=tmin) & (tt<tmax))
+            axset[i+1].plot(tt[idx], stream[2][idx].T)
+            axset[i+1].set_title(os.path.basename(stream[0]))
+            # Make label equal to simply the cam number
+        plt.show()
 
 def extract_audio_from_video(path_to_video, output_dir):
     """Extracts audio from path provided.
@@ -65,7 +92,7 @@ def extract_audio_from_video(path_to_video, output_dir):
     """
     audio_codecout = 'pcm_s16le'
     audio_suffix = '_16bit'
-    audio_file = os.path.basename(path_to_video) + audio_suffix + '.wav'
+    audio_file = os.path.basename(os.path.splitext(path_to_video)[0]) + audio_suffix + '.wav'
     if not os.path.exists(path_to_video):
         raise ValueError('Path provided cannot be found.')
     if os.path.exists(os.path.join(output_dir, audio_file)):
@@ -81,7 +108,7 @@ def extract_audio_from_video(path_to_video, output_dir):
         '-y', '-vn',                  # overwrite output file without asking; no video
         '-loglevel', 'error',
         audio_file]
-    pipe = subprocess.run(command, timeout=50)
+    pipe = subprocess.run(command, timeout=50, check=False)
 
     if pipe.returncode==0:
         print('Audio extraction was successful for ' + path_to_video)

From e27fc50ae0364a0b51f3ed32f721580ec7fa5833 Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Tue, 20 Aug 2024 13:30:50 -0700
Subject: [PATCH 4/9] Changed to using pathlib

---
 src/ilabs_streamsync/example_script.py |  8 ++++----
 src/ilabs_streamsync/streamsync.py     | 20 +++++++++++---------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index b1518f6..e38f35e 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -4,13 +4,13 @@
 if __name__ == "__main__":
     # load an MNE raw file
     raw = None
-    # cam1 = "/Users/ashtondoane/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
+    cam1 = "/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
     flux1 = None
     my_events = []
 
-    # extract_audio_from_video(cam1, "/Users/ashtondoane/VideoSync_NonSubject/output")
-    ss = StreamSync(None, None)
-    ss.add_stream("/Users/ashtondoane/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
+    extract_audio_from_video(cam1, "/Users/user/VideoSync_NonSubject/output")
+    ss = StreamSync(None, None) #Raw type not supported yet
+    ss.add_stream("/Users/user/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
     ss.plot_sync_pulses(tmin=0.998,tmax=1)
 
     # subjects = ["146a", "222b"]
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index f57af6d..16bdf55 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+import pathlib
 import subprocess
 
 import matplotlib.pyplot as plt
@@ -40,7 +41,7 @@ def add_stream(self, stream, channel=None, events=None):
 
     def _extract_data_from_stream(self, stream, channel):
         """Extracts pulses and raw data from stream provided."""
-        ext = os.path.splitext(stream)[1]
+        ext = pathlib.Path(stream).suffix
         if ext == ".fif":
             return self._extract_data__from_raw(stream, channel)
         if ext == ".wav":
@@ -69,7 +70,7 @@ def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
             tt = np.arange(npts) / stream[1]
             idx = np.where((tt>=tmin) & (tt<tmax))
             axset[i+1].plot(tt[idx], stream[2][idx].T)
-            axset[i+1].set_title(os.path.basename(stream[0]))
+            axset[i+1].set_title(pathlib.Path(stream[0]).name)
             # Make label equal to simply the cam number
         plt.show()
 
@@ -92,11 +93,12 @@ def extract_audio_from_video(path_to_video, output_dir):
     """
     audio_codecout = 'pcm_s16le'
     audio_suffix = '_16bit'
-    audio_file = os.path.basename(os.path.splitext(path_to_video)[0]) + audio_suffix + '.wav'
-    if not os.path.exists(path_to_video):
+    p = pathlib.Path(path_to_video)
+    audio_file = p.stem + audio_suffix + '.wav'
+    if not p.exists():
         raise ValueError('Path provided cannot be found.')
-    if os.path.exists(os.path.join(output_dir, audio_file)):
-        raise Exception("Audio already exists for " + path_to_video + " in output directory " + output_dir)
+    if pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file)).exists():
+        raise Exception(f"Audio already exists for {path_to_video} in output directory.")
 
     command = ['ffmpeg',
         '-acodec', 'pcm_s24le',       # force little-endian format (req'd for Linux)
@@ -111,8 +113,8 @@ def extract_audio_from_video(path_to_video, output_dir):
     pipe = subprocess.run(command, timeout=50, check=False)
 
     if pipe.returncode==0:
-        print('Audio extraction was successful for ' + path_to_video)
-        output_path = os.path.join(output_dir, audio_file)
+        print(f'Audio extraction was successful for {path_to_video}')
+        output_path = pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file))
         os.renames(audio_file, output_path)
     else:
-        print("Audio extraction unsuccessful for " + path_to_video)
\ No newline at end of file
+        print(f"Audio extraction unsuccessful for {path_to_video}")
\ No newline at end of file

From df2bac081100e32b3a02aa7706712d7c3d072dc7 Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Tue, 20 Aug 2024 13:45:31 -0700
Subject: [PATCH 5/9] Fixed docstring

---
 src/ilabs_streamsync/streamsync.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index 16bdf55..f11020c 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -20,6 +20,7 @@ class StreamSync:
     """
 
     def __init__(self, reference_object, pulse_channel):
+        """Initialize StreamSync object with 'Raw' MEG associated with it."""
         # self.ref_stream = reference_object.get_chan(pulse_channel)
         self.ref_stream = None
         # self.sfreq = reference_object.info["sfreq"]  # Hz
@@ -28,6 +29,7 @@ def __init__(self, reference_object, pulse_channel):
 
     def add_stream(self, stream, channel=None, events=None):
         """Add a new ``Raw`` or video stream, optionally with events.
+
         stream : str
             File path to an audio or FIF stream.
         channel : str | int | None
@@ -40,7 +42,7 @@ def add_stream(self, stream, channel=None, events=None):
         self.streams.append((stream, srate, pulses, data))
 
     def _extract_data_from_stream(self, stream, channel):
-        """Extracts pulses and raw data from stream provided."""
+        """Extract pulses and raw data from stream provided."""
         ext = pathlib.Path(stream).suffix
         if ext == ".fif":
             return self._extract_data__from_raw(stream, channel)
@@ -53,7 +55,7 @@ def _extract_data__from_raw(self, stream, channel):
         pass
 
     def _extract_data_from_wav(self, stream, channel):
-        "Returns tuple of (pulse channel, audio channel) from stereo file."
+        """Return tuple of (pulse channel, audio channel) from stereo file."""
         srate, wav_signal = wavread(stream)
         return (srate, wav_signal[:,channel], wav_signal[:,1-channel])
 
@@ -63,6 +65,7 @@ def do_syncing(self):
         # TODO spit out a report of correlation/association between all pairs of streams
 
     def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
+        """Plot each stream in the class."""
         # TODO Plot the raw file on the first plot.
         fig, axset = plt.subplots(len(self.streams)+1, 1, figsize = [8,6]) #show individual channels seperately, and the 0th plot is the combination of these. 
         for i, stream in enumerate(self.streams):
@@ -75,7 +78,7 @@ def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
         plt.show()
 
 def extract_audio_from_video(path_to_video, output_dir):
-    """Extracts audio from path provided.
+    """Extract audio from path provided.
 
     path_to_video: str
         Path to audio file

From b1235c89ba51dfa996cec58d7675c11084f3f8dc Mon Sep 17 00:00:00 2001
From: ashtondoane <62417662+ashtondoane@users.noreply.github.com>
Date: Fri, 6 Sep 2024 23:49:02 -0700
Subject: [PATCH 6/9] Corrected typo.

Co-authored-by: Daniel McCloy <dan@mccloy.info>
---
 src/ilabs_streamsync/streamsync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index f11020c..eae26e6 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -45,7 +45,7 @@ def _extract_data_from_stream(self, stream, channel):
         """Extract pulses and raw data from stream provided."""
         ext = pathlib.Path(stream).suffix
         if ext == ".fif":
-            return self._extract_data__from_raw(stream, channel)
+            return self._extract_data_from_raw(stream, channel)
         if ext == ".wav":
             return self._extract_data_from_wav(stream, channel)
         raise TypeError("Stream provided was of unsupported format. Please provide a fif or wav file.")

From c1ddcd8f817a1339328155f096aaf19fa2c2bba0 Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@eduroam-269-3-007.epfl.ch>
Date: Tue, 10 Sep 2024 11:35:29 +0200
Subject: [PATCH 7/9] Implemented MNE file upload. Corrected subprocess to
 immediately place extracted file in correct location.

---
 src/ilabs_streamsync/example_script.py | 18 +++++---
 src/ilabs_streamsync/streamsync.py     | 63 +++++++++++++++++++-------
 2 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index e38f35e..94909e4 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -1,17 +1,23 @@
+from __future__ import annotations
+
 import mne
 from streamsync import StreamSync, extract_audio_from_video
 
 if __name__ == "__main__":
     # load an MNE raw file
-    raw = None
-    cam1 = "/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
+    raw = "/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_raw.fif"
+    channel = "STI011"
+    cams = ["/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"]
+    output_dir = "/Users/user/VideoSync_NonSubject/output"
     flux1 = None
     my_events = []
 
-    extract_audio_from_video(cam1, "/Users/user/VideoSync_NonSubject/output")
-    ss = StreamSync(None, None) #Raw type not supported yet
-    ss.add_stream("/Users/user/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
-    ss.plot_sync_pulses(tmin=0.998,tmax=1)
+    for cam in cams:
+        extract_audio_from_video(cam, output_dir)
+    ss = StreamSync(raw, channel)
+
+    # ss.add_stream("/Users/ashtondoane/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
+    # ss.plot_sync_pulses(tmin=0.998,tmax=1)
 
     # subjects = ["146a", "222b"]
 
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index f11020c..82583bb 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -1,13 +1,17 @@
+
 from __future__ import annotations
 
 import os
 import pathlib
 import subprocess
 
+import logger
 import matplotlib.pyplot as plt
+import mne
 import numpy as np
 from scipy.io.wavfile import read as wavread
 
+FFMPEG_TIMEOUT_SEC = 50
 
 class StreamSync:
     """Synchronize two data streams.
@@ -20,12 +24,41 @@ class StreamSync:
     """
 
     def __init__(self, reference_object, pulse_channel):
-        """Initialize StreamSync object with 'Raw' MEG associated with it."""
-        # self.ref_stream = reference_object.get_chan(pulse_channel)
-        self.ref_stream = None
-        # self.sfreq = reference_object.info["sfreq"]  # Hz
-        self.sfreq = 0
-        self.streams = [] #  of (filename, srate, Pulses, Data)
+        """Initialize StreamSync object with 'Raw' MEG associated with it.
+        
+        reference_object: str TODO: is str the best method for this, or should this be pathlib obj?
+            File path to an MEG raw file with fif formatting. TODO: Verify fif only?
+        pulse_channel: str
+            A string associated with the stim channel name.
+        """
+        # Check provided reference_object for type and existence.
+        if not reference_object:
+            raise TypeError("reference_object is None. Please provide reference_object of type str.")
+        if type(reference_object) is not str:
+            raise TypeError("reference_object must be a file path of type str.")
+        ref_path_obj = pathlib.Path(reference_object)
+        if not ref_path_obj.exists():
+            raise OSError("reference_object file path does not exist.")
+        if not ref_path_obj.suffix == ".fif":
+            raise ValueError("Provided reference object is not of type .fif")
+
+        # Load in raw file if valid
+        raw = mne.io.read_raw_fif(reference_object, preload=False, allow_maxshield=True)
+
+        #Check type and value of pulse_channel, and ensure reference object has such a channel.
+        if not pulse_channel:
+            raise TypeError("pulse_channel is None. Please provide pulse_chanel parameter of type int.")
+        if type(pulse_channel) is not str:
+            raise TypeError("pulse_chanel parameter must be of type str.")
+        if raw[pulse_channel] is None:
+            raise ValueError('pulse_channel does not exist in refrence_object.')
+        
+
+        self.raw = mne.io.read_raw_fif(reference_object, preload=False, allow_maxshield=True)
+        self.ref_stream = raw[pulse_channel]
+        self.sfreq = self.raw.info["sfreq"]  # Hz
+
+        self.streams = [] # of (filename, srate, Pulses, Data)
 
     def add_stream(self, stream, channel=None, events=None):
         """Add a new ``Raw`` or video stream, optionally with events.
@@ -44,15 +77,10 @@ def add_stream(self, stream, channel=None, events=None):
     def _extract_data_from_stream(self, stream, channel):
         """Extract pulses and raw data from stream provided."""
         ext = pathlib.Path(stream).suffix
-        if ext == ".fif":
-            return self._extract_data__from_raw(stream, channel)
         if ext == ".wav":
             return self._extract_data_from_wav(stream, channel)
-        raise TypeError("Stream provided was of unsupported format. Please provide a fif or wav file.")
-            
+        raise TypeError("Stream provided was of unsupported format. Please provide a wav file.")
 
-    def _extract_data__from_raw(self, stream, channel):
-        pass
 
     def _extract_data_from_wav(self, stream, channel):
         """Return tuple of (pulse channel, audio channel) from stereo file."""
@@ -102,6 +130,11 @@ def extract_audio_from_video(path_to_video, output_dir):
         raise ValueError('Path provided cannot be found.')
     if pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file)).exists():
         raise Exception(f"Audio already exists for {path_to_video} in output directory.")
+    
+    # Create output directory is non-existent.
+    od = pathlib.Path(output_dir)
+    od.mkdir(exist_ok=True, parents=True)
+    output_path = output_dir + "/" + audio_file
 
     command = ['ffmpeg',
         '-acodec', 'pcm_s24le',       # force little-endian format (req'd for Linux)
@@ -112,12 +145,10 @@ def extract_audio_from_video(path_to_video, output_dir):
         '-ac', '2',                   # no longer mono output, so setting to "2"
         '-y', '-vn',                  # overwrite output file without asking; no video
         '-loglevel', 'error',
-        audio_file]
-    pipe = subprocess.run(command, timeout=50, check=False)
+        output_path]
+    pipe = subprocess.run(command, timeout=FFMPEG_TIMEOUT_SEC, check=False)
 
     if pipe.returncode==0:
         print(f'Audio extraction was successful for {path_to_video}')
-        output_path = pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file))
-        os.renames(audio_file, output_path)
     else:
         print(f"Audio extraction unsuccessful for {path_to_video}")
\ No newline at end of file

From 32648c6377d25ed2963112c1cd492f97a15d1bbb Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Wed, 11 Sep 2024 18:44:16 +0200
Subject: [PATCH 8/9] Corrected errors mentioned in prior PR. Initializes SS
 object with reference MEG. Implemented dispaly of all pulse channels (could
 be updated to be more user friendly).

---
 src/ilabs_streamsync/example_script.py |  5 ++--
 src/ilabs_streamsync/streamsync.py     | 35 +++++++++++++++++++-------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index 94909e4..12bf205 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -16,8 +16,9 @@
         extract_audio_from_video(cam, output_dir)
     ss = StreamSync(raw, channel)
 
-    # ss.add_stream("/Users/ashtondoane/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
-    # ss.plot_sync_pulses(tmin=0.998,tmax=1)
+    # TODO: Perhaps the extraction above could return the newly created paths so that this doesn't need to be hard coded.
+    ss.add_stream("/Users/user/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
+    ss.plot_sync_pulses(tmin=0.5,tmax=50)
 
     # subjects = ["146a", "222b"]
 
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index 82583bb..7d423ab 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -1,11 +1,11 @@
 
 from __future__ import annotations
 
+import logging
 import os
 import pathlib
 import subprocess
 
-import logger
 import matplotlib.pyplot as plt
 import mne
 import numpy as np
@@ -33,21 +33,21 @@ def __init__(self, reference_object, pulse_channel):
         """
         # Check provided reference_object for type and existence.
         if not reference_object:
-            raise TypeError("reference_object is None. Please provide reference_object of type str.")
+            raise TypeError("reference_object is None. Please provide a path.")
         if type(reference_object) is not str:
             raise TypeError("reference_object must be a file path of type str.")
         ref_path_obj = pathlib.Path(reference_object)
         if not ref_path_obj.exists():
             raise OSError("reference_object file path does not exist.")
         if not ref_path_obj.suffix == ".fif":
-            raise ValueError("Provided reference object is not of type .fif")
+            raise ValueError("Provided reference object does not point to a .fif file.")
 
         # Load in raw file if valid
         raw = mne.io.read_raw_fif(reference_object, preload=False, allow_maxshield=True)
 
         #Check type and value of pulse_channel, and ensure reference object has such a channel.
         if not pulse_channel:
-            raise TypeError("pulse_channel is None. Please provide pulse_chanel parameter of type int.")
+            raise TypeError("pulse_channel is None. Please provide a channel name of type str.")
         if type(pulse_channel) is not str:
             raise TypeError("pulse_chanel parameter must be of type str.")
         if raw[pulse_channel] is None:
@@ -56,6 +56,7 @@ def __init__(self, reference_object, pulse_channel):
 
         self.raw = mne.io.read_raw_fif(reference_object, preload=False, allow_maxshield=True)
         self.ref_stream = raw[pulse_channel]
+
         self.sfreq = self.raw.info["sfreq"]  # Hz
 
         self.streams = [] # of (filename, srate, Pulses, Data)
@@ -75,7 +76,7 @@ def add_stream(self, stream, channel=None, events=None):
         self.streams.append((stream, srate, pulses, data))
 
     def _extract_data_from_stream(self, stream, channel):
-        """Extract pulses and raw data from stream provided."""
+        """Extract pulses and raw data from stream provided. TODO: Implement adding a annotation stream."""
         ext = pathlib.Path(stream).suffix
         if ext == ".wav":
             return self._extract_data_from_wav(stream, channel)
@@ -87,15 +88,30 @@ def _extract_data_from_wav(self, stream, channel):
         srate, wav_signal = wavread(stream)
         return (srate, wav_signal[:,channel], wav_signal[:,1-channel])
 
+    def remove_stream(self, stream):
+        pass
+
     def do_syncing(self):
         """Synchronize all streams with the reference stream."""
         # TODO (waves hands) do the hard part.
         # TODO spit out a report of correlation/association between all pairs of streams
 
     def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
-        """Plot each stream in the class."""
-        # TODO Plot the raw file on the first plot.
+        """Plot each stream in the class.
+        
+        tmin: int
+            Minimum timestamp to be graphed.
+        tmax: int
+            Maximum timestamp to be graphed.    
+        """
         fig, axset = plt.subplots(len(self.streams)+1, 1, figsize = [8,6]) #show individual channels seperately, and the 0th plot is the combination of these. 
+        # Plot reference_object
+        trig, tt_trig = self.ref_stream
+        trig = trig.reshape(tt_trig.shape)
+        idx = np.where((tt_trig>=tmin) & (tt_trig<tmax))
+        axset[0].plot(tt_trig[idx], trig[idx]*100, c='r')
+        axset[0].set_title("Reference MEG")
+        # Plot all other streams
         for i, stream in enumerate(self.streams):
             npts = len(stream[2])
             tt = np.arange(npts) / stream[1]
@@ -148,7 +164,8 @@ def extract_audio_from_video(path_to_video, output_dir):
         output_path]
     pipe = subprocess.run(command, timeout=FFMPEG_TIMEOUT_SEC, check=False)
 
+    logger = logging.getLogger(__name__)
     if pipe.returncode==0:
-        print(f'Audio extraction was successful for {path_to_video}')
+        logger.info(f'Audio extraction was successful for {path_to_video}')
     else:
-        print(f"Audio extraction unsuccessful for {path_to_video}")
\ No newline at end of file
+        logger.info(f"Audio extraction unsuccessful for {path_to_video}")
\ No newline at end of file

From 3a6e501bac591774d56638b06733696d75bd1b4f Mon Sep 17 00:00:00 2001
From: Ashton Doane <ashtondoane@Ashtons-MacBook-Air-2.local>
Date: Thu, 12 Sep 2024 10:18:39 +0200
Subject: [PATCH 9/9] Fixed final PR comments

---
 src/ilabs_streamsync/example_script.py |  3 +--
 src/ilabs_streamsync/streamdata.py     | 23 +++++++++++++++++++++
 src/ilabs_streamsync/streamsync.py     | 28 ++++++++++++--------------
 3 files changed, 37 insertions(+), 17 deletions(-)
 create mode 100644 src/ilabs_streamsync/streamdata.py

diff --git a/src/ilabs_streamsync/example_script.py b/src/ilabs_streamsync/example_script.py
index 12bf205..e006f72 100644
--- a/src/ilabs_streamsync/example_script.py
+++ b/src/ilabs_streamsync/example_script.py
@@ -13,10 +13,9 @@
     my_events = []
 
     for cam in cams:
-        extract_audio_from_video(cam, output_dir)
+        extract_audio_from_video(cam, output_dir, overwrite=False) #This could potentially return filenames to avoid the hardcoding seen below.
     ss = StreamSync(raw, channel)
 
-    # TODO: Perhaps the extraction above could return the newly created paths so that this doesn't need to be hard coded.
     ss.add_stream("/Users/user/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
     ss.plot_sync_pulses(tmin=0.5,tmax=50)
 
diff --git a/src/ilabs_streamsync/streamdata.py b/src/ilabs_streamsync/streamdata.py
new file mode 100644
index 0000000..9a30fcf
--- /dev/null
+++ b/src/ilabs_streamsync/streamdata.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+class StreamData:
+    """
+    Store information about stream of data.
+    """ 
+    def __init__(self, filename, sample_rate, pulses, data):
+        """
+        Initialize object with associated properties.
+
+        filename: str
+            Path to the file with stream data
+        sample_rate: int
+            Sampling rate of the data
+        pulses: np.array
+            Numpy array representing the pulses.
+        data: np.array
+            NumPy array representing all streams of data.
+        """
+        self.filename = filename
+        self.sample_rate = sample_rate
+        self.pulses = pulses
+        self.data = data
\ No newline at end of file
diff --git a/src/ilabs_streamsync/streamsync.py b/src/ilabs_streamsync/streamsync.py
index 7d423ab..f967202 100644
--- a/src/ilabs_streamsync/streamsync.py
+++ b/src/ilabs_streamsync/streamsync.py
@@ -10,6 +10,7 @@
 import mne
 import numpy as np
 from scipy.io.wavfile import read as wavread
+from streamdata import StreamData
 
 FFMPEG_TIMEOUT_SEC = 50
 
@@ -59,7 +60,7 @@ def __init__(self, reference_object, pulse_channel):
 
         self.sfreq = self.raw.info["sfreq"]  # Hz
 
-        self.streams = [] # of (filename, srate, Pulses, Data)
+        self.streams = [] # list of StreamData objects
 
     def add_stream(self, stream, channel=None, events=None):
         """Add a new ``Raw`` or video stream, optionally with events.
@@ -72,8 +73,7 @@ def add_stream(self, stream, channel=None, events=None):
             Events associated with the stream. TODO: should they be integer sample
             numbers? Timestamps? Do we support both?
         """
-        srate, pulses, data = self._extract_data_from_stream(stream, channel=channel)
-        self.streams.append((stream, srate, pulses, data))
+        self.streams.append(self._extract_data_from_stream(stream, channel=channel))
 
     def _extract_data_from_stream(self, stream, channel):
         """Extract pulses and raw data from stream provided. TODO: Implement adding a annotation stream."""
@@ -86,7 +86,7 @@ def _extract_data_from_stream(self, stream, channel):
     def _extract_data_from_wav(self, stream, channel):
         """Return tuple of (pulse channel, audio channel) from stereo file."""
         srate, wav_signal = wavread(stream)
-        return (srate, wav_signal[:,channel], wav_signal[:,1-channel])
+        return StreamData(filename = stream, sample_rate=srate, pulses=wav_signal[:,channel], data=wav_signal[:,1-channel])
 
     def remove_stream(self, stream):
         pass
@@ -96,7 +96,7 @@ def do_syncing(self):
         # TODO (waves hands) do the hard part.
         # TODO spit out a report of correlation/association between all pairs of streams
 
-    def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
+    def plot_sync_pulses(self, tmin=0, tmax=None):
         """Plot each stream in the class.
         
         tmin: int
@@ -113,15 +113,15 @@ def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
         axset[0].set_title("Reference MEG")
         # Plot all other streams
         for i, stream in enumerate(self.streams):
-            npts = len(stream[2])
-            tt = np.arange(npts) / stream[1]
+            npts = len(stream.pulses)
+            tt = np.arange(npts) / stream.sample_rate
             idx = np.where((tt>=tmin) & (tt<tmax))
-            axset[i+1].plot(tt[idx], stream[2][idx].T)
-            axset[i+1].set_title(pathlib.Path(stream[0]).name)
+            axset[i+1].plot(tt[idx], stream.pulses[idx].T)
+            axset[i+1].set_title(pathlib.Path(stream.filename).name)
             # Make label equal to simply the cam number
         plt.show()
 
-def extract_audio_from_video(path_to_video, output_dir):
+def extract_audio_from_video(path_to_video, output_dir, overwrite=False):
     """Extract audio from path provided.
 
     path_to_video: str
@@ -138,13 +138,11 @@ def extract_audio_from_video(path_to_video, output_dir):
         ValueException if video path does not exist, 
         Exception if filename is taken in output_dir
     """
-    audio_codecout = 'pcm_s16le'
-    audio_suffix = '_16bit'
     p = pathlib.Path(path_to_video)
-    audio_file = p.stem + audio_suffix + '.wav'
+    audio_file = p.with_stem(f"{p.stem}_16_bit").with_suffix(".wav").name
     if not p.exists():
         raise ValueError('Path provided cannot be found.')
-    if pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file)).exists():
+    if not overwrite and pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file)).exists():
         raise Exception(f"Audio already exists for {path_to_video} in output directory.")
     
     # Create output directory is non-existent.
@@ -157,7 +155,7 @@ def extract_audio_from_video(path_to_video, output_dir):
         '-i', path_to_video,
         '-map', '0:a',                # audio only (per DM)
 #         '-af', 'highpass=f=0.1',
-        '-acodec', audio_codecout,
+        '-acodec', 'pcm_s16le',
         '-ac', '2',                   # no longer mono output, so setting to "2"
         '-y', '-vn',                  # overwrite output file without asking; no video
         '-loglevel', 'error',