biof309 · cwusinich · Feb 27, 2020 · Feb 27, 2020 · Feb 27, 2020 · Feb 27, 2020
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
 __pycache__
+.DS_Store
+.Rhistory
+.py.swp
+MID_proc.egg-info
diff --git a/LICENSE b/LICENSE
@@ -175,18 +175,7 @@
 
    END OF TERMS AND CONDITIONS
 
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2020 Christina Wusinich
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/MID_proc/MID_beh.py b/MID_proc/MID_beh.py
@@ -0,0 +1,130 @@
+#CW 5.8.2020
+#output of these functions include marker files (for marker placement in MEG file processing) and behavioral data results (adds them to master spreadsheet)
+
+import sys,os
+import csv
+import numpy as np
+import pandas as pd
+import statistics
+
+
+##############################
+#STEP 1: MAKE MARKER TXT FILES
+#MEG preprocessing step 1 must be run first so that the cue files exist
+def make_markerfiles_MID(subjectlist,subdir_base):
+	'''Makes marker text files for each subject in input list and puts them in subject's meg folder.'''	
+	sublist=open(subjectlist, 'r')
+	proclist=sublist.readlines()
+	proclist=map(lambda x: x.strip(), proclist)
+	#loop through all subjects in input list begins here!
+	for subject in proclist:
+		subdir=f'{subdir_base}/sub-{subject}'
+		#pull data from cue_marks file
+		stimtime=pd.read_csv(subdir + '/meg/cue_marks',delimiter=' ',names=['Onset','Time'])
+
+		#read MID behavior file
+		data_read=pd.read_csv(subdir + '/behavior/MID1-' + subject + '-1_behavior.txt',delimiter='\t',skiprows=1)
+
+		#pull data from cue file and behavior file to make a stimtimes variables (includes all trial types)
+		trial=data_read[['Cue']]
+		frames=[trial,stimtime]
+		stimtimes_full=pd.concat(frames,axis=1)
+
+		#from stimtimes_full, pulls out win stim times and adds to a matrix (df_#_win)
+		stimtimes_win=stimtimes_full[stimtimes_full.Cue=='Win2']
+		df_win=stimtimes_win[stimtimes_win.columns[1:3]]
+
+		#repeats the above for lose stims
+		stimtimes_lose=stimtimes_full[stimtimes_full.Cue=='Lose2']
+		df_lose=stimtimes_lose[stimtimes_lose.columns[1:3]]
+
+		#repeats the above for control stims
+		stimtimes_cont=stimtimes_full[stimtimes_full.Cue=='Control']
+		df_cont=stimtimes_cont[stimtimes_cont.columns[1:3]]
+
+		#adds win, lose, and control stim time matrices each to its own text file in each subjects meg folder; these text files are used in MEG preprocessing step 2 to add trial type stim markers
+		df_win.to_csv(subdir + '/meg/' + subject + '_win.txt',index=False,header=False,sep=' ')
+		df_lose.to_csv(subdir + '/meg/' + subject + '_lose.txt',index=False,header=False,sep=' ')
+		df_cont.to_csv(subdir + '/meg/' + subject + '_cont.txt',index=False,header=False,sep=' ')
+
+
+#####################################################
+#STEP 2: CLEAN MID BEHAVIORAL DATA AND OUTPUT DESCRIPTIVES
+def clean_beh_MID(subjectlist,subdir_base,outputdir):
+	'''Makes cleaned behavior file and descriptives file (mean reaction time and accuracy by win/lose/control trial type) for each subject and puts it in their beh folder and a group behavior files folder'''
+	sublist=open(subjectlist, 'r')
+	processinglist=sublist.readlines()
+	processinglist=map(lambda x: x.strip(), processinglist)
+	#initiate aggregate df for later
+	df_all=[]
+	#loop through all subjects in input list begins here!
+	for subject in processinglist:
+		subdir=f'{subdir_base}/sub-{subject}'
+		#read in subject's behavior data file
+		data_read=pd.read_csv(subdir + '/behavior/MID1-' + subject + '-1_behavior.txt',delimiter='\t',skiprows=1)
+		behavior=data_read[['Cue','ResponseType','Target.RT']]
+
+		#assign win/loss/control hits and misses to variables respectively
+		behavior_win=behavior[behavior.Cue=='Win2']
+		behavior_win_hit=behavior_win[behavior_win.ResponseType=='GoodResponse']
+		behavior_win_miss=behavior_win[behavior_win.ResponseType=='NoResponse']
+
+		behavior_lose=behavior[behavior.Cue=='Lose2']
+		behavior_lose_hit=behavior_lose[behavior_lose.ResponseType=='GoodResponse']
+		behavior_lose_miss=behavior_lose[behavior_lose.ResponseType=='NoResponse']
+
+		behavior_cont=behavior[behavior.Cue=='Control']
+		behavior_cont_hit=behavior_cont[behavior_cont.ResponseType=='GoodResponse']
+		behavior_cont_miss=behavior_cont[behavior_cont.ResponseType=='NoResponse']
+
+		#calculate reaction times and accuracy for each trial type and assign to variables respectively
+		win_hit_RT=behavior_win_hit['Target.RT'][behavior_win_hit['Target.RT']!=0].mean()
+		win_hit_acc=behavior_win_hit.count()
+		win_miss_RT=behavior_win_miss['Target.RT'][behavior_win_miss['Target.RT']!=0].mean()
+		win_miss_acc=behavior_win_miss.count()
+		lose_hit_RT=behavior_lose_hit['Target.RT'][behavior_lose_hit['Target.RT']!=0].mean()
+		lose_hit_acc=behavior_lose_hit.count()
+		lose_miss_RT=behavior_lose_miss['Target.RT'][behavior_lose_miss['Target.RT']!=0].mean()
+		lose_miss_acc=behavior_lose_miss.count()
+		cont_hit_RT=behavior_cont_hit['Target.RT'][behavior_cont_hit['Target.RT']!=0].mean()
+		cont_hit_acc=behavior_cont_hit.count()
+		cont_miss_RT=behavior_cont_miss['Target.RT'][behavior_cont_miss['Target.RT']!=0].mean()
+		cont_miss_acc=behavior_cont_miss.count()
+
+		#create dataframe for holding these variables; Type = type of trial (win, lose, control), Outcome = outcome of trial (hit, miss), RT = mean reaction times for each trial type/outcome, Acc = mean accuracy for each trial type/outcome
+		d={'Type':['Win','Win','Lose','Lose','Cont','Cont'],
+		   'Outcome':['Hit','Miss','Hit','Miss','Hit','Miss'],
+		   'RT':[win_hit_RT,win_miss_RT,lose_hit_RT,lose_miss_RT,cont_hit_RT,cont_miss_RT],
+		   'Acc':[win_hit_acc.Cue,win_miss_acc.Cue,lose_hit_acc.Cue,lose_miss_acc.Cue,cont_hit_acc.Cue,cont_miss_acc.Cue]}
+		df=pd.DataFrame(d)
+
+		#put dataframe from above into a csv (in case we want this data at some point)
+		df.to_csv(subdir + '/behavior/' + subject + '_behavior_MID.csv',index=False,sep=' ')
+
+		#calculate mean RTs for each trial type (collapsing outcome)
+		win_RT=behavior_win['Target.RT'][behavior_win['Target.RT']!=0].mean()
+		lose_RT=behavior_lose['Target.RT'][behavior_lose['Target.RT']!=0].mean()
+		cont_RT=behavior_cont['Target.RT'][behavior_cont['Target.RT']!=0].mean()
+
+		#calculate average accuracy for each trial type (collapsing outcome)
+		win_acc=win_hit_acc.Cue/26
+		lose_acc=lose_hit_acc.Cue/26
+		cont_acc=cont_hit_acc.Cue/26
+
+		#make new dataframe for collapsed data
+		d_2={'Type':['Win','Lose','Cont'],
+		   'RT':[win_RT,lose_RT,cont_RT],'Acc':[win_acc,lose_acc,cont_acc]}
+		df_2=pd.DataFrame(d_2)
+
+		#put dataframe from above into a csv in subject's behavior directory (this is the data we want to analyze for now)
+		df_2.to_csv(subdir + '/behavior/' + subject + '_behavior_MID_bytrial.csv',index=False,sep=' ')
+
+		#add each subject's collapsed data into one csv with their subject number
+		df_2['Subject']=subject
+		df_all.append(df_2)
+	df_all=pd.concat(df_all)
+	df_all.to_csv(f'{outputdir}/all_behavior_MID_bytrial.csv',index=False,sep=' ')
+
+if __name__=='__main__':
+	make_markerfiles_MID()	
+	clean_beh_MID()
diff --git a/MID_proc/__init__.py b/MID_proc/__init__.py
@@ -0,0 +1,5 @@
+'''Tools for processing MID MEG and behavioral data!'''
+
+from .MID_beh import make_markerfiles_MID,clean_beh_MID
+from .make_paramfiles import make_param
+from .make_swarms import make_swarm_newDs,make_swarm_sam
diff --git a/MID_proc/make_paramfiles.py b/MID_proc/make_paramfiles.py
@@ -0,0 +1,54 @@
+#CW 5.8.2020
+#makes .param files and puts them in each subject's meg folder
+
+import os,sys
+
+#make this dictionary
+alpha=['alpha','8 14']
+beta=['beta','15 29']
+gamma=['gamma','30 60']
+highgamma=['highgamma','62 118']
+
+#set default variables here:
+default_Marker1='respwin'
+default_marker1window='0.5 2'
+default_freq=highgamma
+
+#setting more variables
+XBounds='-10 10'
+YBounds='-9 9'
+ZBounds='0 15'
+ImageStep='.5'
+ImageMetric='Power'
+Model='Nolte'
+CovType='SUM'
+ImageFormat='TLRC 5'
+
+def make_param(freq,rootdir='/data/MoodGroup/07M0021_meg_analysis/MID_data/subjects', NumMarkers='1', Marker1='respwin', marker1window='0.5 2'):
+	'''Makes param files for each subject in their meg folder.'''
+	#define subject list and some other things for the file
+	root, dirs, files = os.walk(rootdir).__next__()
+	sublist=list(dirs)
+
+	#provide dict of frequency band options and their Hz range, so you just need to enter the name string of the band as an argument
+	freq_dict={'alpha':'8 14','beta':'15 29','gamma':'30 60','highgamma':'62 118'}
+	freqband=freq_dict[freq]
+	OrientBand=freqband
+	NoiseBand=freqband
+	CovBand=freqband
+	ImageBand=freqband
+	DataSegment=marker1window
+
+	#bestow an appropriate name upon the new param file	
+	paramfile_name=f'{freq}_{Marker1}.param'
+
+	#make param file for each subject and drop it in their meg folder
+	for sub in sublist:
+		new_paramfile=open(f'{rootdir}/{sub}/meg/{paramfile_name}','w+')	
+		new_paramfile.write(f'NumMarkers {NumMarkers}\nMarker1 {Marker1} {marker1window} TRUE\nOrientBand {OrientBand}\nNoiseBand {NoiseBand}\nCovBand {CovBand}\nImageBand {ImageBand}\nDataSegment {DataSegment}\nXBounds {XBounds}\nYBounds {YBounds}\nZBounds {ZBounds}\nImageStep {ImageStep}\nImageMetric {ImageMetric}\nPrefixLength {str(len(sub))}\nMRIDirectory {rootdir}/{sub}/mri\nModel {Model}\nCovType {CovType}\nImageFormat {ImageFormat}')
+		new_paramfile.close()
+
+	print('Param files have been added!')
+
+if __name__=='__main__':
+	make_param()
diff --git a/MID_proc/make_swarms.py b/MID_proc/make_swarms.py
@@ -0,0 +1,93 @@
+#CW 5.8.2020
+#makes swarm file in swarm folder for copying OG MID dataset into a new dataset; also makes SAM command swarm files (sam_cov, sam_wts, and sam_3d)
+#if you run this twice on the same date with the same arguments, it will overwrite the old files (on purpose!)
+
+import os
+from datetime import datetime
+
+#set current date for use in filenames
+date_today=datetime.now().strftime('%m%d%Y')
+
+
+### MAKE NEWDS SWARM FILE
+def make_swarm_newDs(subjectlist,newds,marker,timewindow,swarmdir,subdir, origds='_MID-f.ds'):
+	'''Makes swarm file for creating new MEG datasets based on input parameters.'''
+	#create newDs swarm file and add info about how to run it
+	swarmfile_name=f'{swarmdir}/newDs_{date_today}.swarm'
+	swarmfile=open(swarmfile_name,'w+')	
+	swarmcommand=f'Run newDs swarm using this command: swarm -f {swarmfile_name} -g 15 -t auto --module ctf --logdir {swarmdir}/swarm_logs'
+	swarmfile.write(f'#{swarmcommand}\n')
+	swarmfile.close()
+
+	#read list of subjects	
+	sublist=open(subjectlist, 'r')
+	proclist=sublist.readlines()
+	proclist=map(lambda x: x.strip(), proclist)
+
+	#append line to swarm file for each subject for newDs command
+	for sub in proclist:
+		swarmfile=open(swarmfile_name,'a')
+		swarmfile.write(f'set -e ; cd {subdir}sub-{sub}/meg ; newDs -marker {marker} -time {timewindow} {sub}{origds} {sub}{newds}\n')
+		swarmfile.close()
+
+	#print message that it is complete, and include command for running swarm file
+	print('Swarm file has been added! \n')
+	print(swarmcommand)
+
+
+### MAKE SAM SWARM FILES
+def make_swarm_sam(subjectlist,ds,marker,freqband,swarmdir,subdir):
+	#read list of subjects	
+	sublist=open(subjectlist, 'r')
+	proclist=sublist.read().splitlines()
+	sublist.close()
+
+	#set paramfile name for use in sam command
+	paramfile_name=f'{freqband}_{marker}.param'
+
+	#create sam_cov swarm file and add info about how to run it
+	swarmfile_name=f'{swarmdir}/sam_cov_{freqband}_{marker}_{date_today}.swarm'
+	swarmfile=open(swarmfile_name,'w+')	
+	swarmcommand_cov=f'Run sam_cov swarm using this command: swarm -f {swarmfile_name} -g 15 -t auto --module samsrcv3 --logdir {swarmdir}/swarm_logs'
+	swarmfile.write('#' + swarmcommand_cov + '\n')
+	swarmfile.close()
+
+	#append line to swarm file for each subject for sam_cov command
+	for sub in proclist:
+		swarmfile=open(swarmfile_name,'a')
+		swarmfile.write(f'set -e ; cd {subdir}sub-{sub}/meg ; sam_cov -r {sub}{ds} -m {paramfile_name} -v \n')
+		swarmfile.close()
+
+
+	#now to make a swarm file for sam_wts
+	swarmfile_name=f'{swarmdir}/sam_wts_{freqband}_{marker}_{date_today}.swarm'
+	swarmfile=open(swarmfile_name,'w+')	
+	swarmcommand_wts=f'Run sam_wts swarm using this command: swarm -f {swarmfile_name} -g 15 -t auto --module samsrcv3 --logdir {swarmdir}/swarm_logs'
+	swarmfile.write(f'#{swarmcommand_wts}\n')
+	swarmfile.close()
+	for sub in proclist:
+		swarmfile=open(swarmfile_name,'a')
+		swarmfile.write(f'set -e ; cd {subdir}sub-{sub}/meg ; sam_wts -r {sub}{ds} -m {paramfile_name} -v --MRIPattern %M/%s -H hull.shape \n')
+		swarmfile.close()
+
+
+	#and finally make sam_3d swarm file
+	swarmfile_name=f'{swarmdir}/sam_3d_{freqband}_{marker}_{date_today}.swarm'
+	swarmfile=open(swarmfile_name,'w+')	
+	swarmcommand_3d=f'Run sam_3d swarm using this command: swarm -f {swarmfile_name} -g 15 -t auto --module samsrcv3 --logdir {swarmdir}/swarm_logs'
+	swarmfile.write(f'#{swarmcommand_3d}\n')
+	swarmfile.close()
+	for sub in proclist:
+		swarmfile=open(swarmfile_name,'a')
+		swarmfile.write(f'set -e ; cd {subdir}sub-{sub}/meg ; sam_3d -r {sub}{ds} -m {paramfile_name} -v \n')
+		swarmfile.close()
+
+
+	#print message that it is complete, and include commands for running each swarm file
+	print('Swarm files have been added! \n')
+	print(f'1.) {swarmcommand_cov}\n\n2.) {swarmcommand_wts}\n\n3.) {swarmcommand_3d}')
+
+
+if __name__=='__main__':
+	make_swarm_newDs()
+	make_swarm_sam()
diff --git a/README.md b/README.md
@@ -1,3 +1,59 @@
 # project_spring_2020
 
 [![CircleCI](https://circleci.com/gh/biof309/project_spring_2020/tree/master.svg?style=shield)](https://circleci.com/gh/biof309/project_spring_2020/tree/master)
+
+## MID_proc
+A very amateur python package by Christina Wusinich
+
+Last updated: 04/08/2020
+
+## Objective of package:  
+A series of scripts for processing behavioral and MEG data from a reward processing task, called the Monetary Incentive Delay (MID) task.
+
+## Background:  
+The MID task shows various shapes which allow participants to win money, avoid losing money, or do nothing (a neutral condition) if they press a button within a particular time window following the shape. After pressing the button, they receive feedback indicating whether they won money, avoided losing money, or had no reward in either direction.
+
+The goal of my initial analysis was to report on deep source activity (i.e. striatum in this case) in the task’s MEG data using synthetic aperture magnetometry (SAM) for source localization. Showing that this is possible in MEG (usually not known for deep source analyses) will pave the way for studying reward processing in mood disorders and the impact of novel treatments using MEG. Moving forward, we are going to (and already have done a bit) conduct analyses in different time windows in the task and in different frequency bands, so the scripts below were designed to make it easier to repeat processing steps with different parameters.
+
+In terms of our analysis of behavior data, we need to gather mean reaction times and accuracy of button presses for analysis, which is done in this package.
+
+## Files that need to be processed:  
+-	Behavior data: a .txt output from the MID task that indicates button presses, timing of stimuli, etc.
+-	MEG data: a CTF (NIH's MEG system) file of MEG data from the MID task
+-	MRI data: structural MRIs for use with source localization (already converted to .nii before this)
+
+## Setup of directories for use with this package:
+-	Behavior data example: MID_data/subjects/sub-201/beh
+-	MEG data example: MID_data/subjects/sub-201/meg
+-	MRI data example: MID_data/subjects/sub-201/mri
+-	swarm files: MID_data/scripts/swarm
+
+
+## Processing steps (that this package helps with):  
+
+##	Behavior processing
+1.	**MID_beh.py** 
+- make_markerfiles_MID(subjectlist,subdir_base): Makes marker files for more MEG processing; these markers will designate win/loss/neutral cues to be marked in the MEG file and are output as three separate .txt files to the subject’s MEG directory
+-	clean_beh_MID(subjectlist,subdir_base,outputdir): Pulls columns from behavior data file and calculates mean RTs and accuracy by trial and subject and appends that to master behavior data sheet
+2.	Input: cue markers text file (this is from a previous MEG processing step not included here), behavior text file
+3.	Output: win/loss/neutral text files (3), two cleaned behavior csvs for each participant (deposited in their respective behavior folders), and master csv that includes mean reaction times and accuracy by participant and trial
+
+##	Pre-SAM parameter file creation
+1.	**make_paramfiles.py**:
+-	make_param(freq,rootdir='/data/MoodGroup/07M0021_meg_analysis/MID_data/subjects', NumMarkers='1', Marker1='respwin', marker1window='0.5 2': Makes parameter files for use with SAM commands (see step below) and drops each param file into each subject's meg directory
+2.	Input: function input parameters; it also needs to find subject folders in your "subjects" directory
+3.	Output: a param file in each subject's meg directory
+
+##	Create swarm files for MEG data processing
+1.	**make_swarms.py**:
+-	make_swarm_newDs(subjectlist,newds,marker,timewindow,swarmdir,subdir,origds='_MID-f.ds'): Makes a swarm file that will create new datasets from existing MEG datasets for all subjects in list (helpful if you want to look in a new time window or use different markers and make a fresh batch of datasets to work with)
+-	make_swarm_sam(subjectlist,ds,marker,freqband,swarmdir,subdir): Makes three swarm files for all subjects in list; each swarm file has a a command for source localization in high gamma using SAM (from samsrcv3)
+2. Note: origds is defaulted to the original filtered MID MEG data file
+3.	Input: original MEG file (something.ds) and processinglist.txt (or some file with a list of participant ID numbers you want to include--one day this will be fancier, but this is what we're working with this week), MRI with fiducial markers set, parameter file (highgamma.param)
+4.	Output: .swarm files in your swarm directory; also after running this, you will see the swarm command(s) you need to run as output in your terminal window
+
+
+## Notes about this young and naive package
+-	Obviously this package does not cover all of the MEG data processing steps involved for this task, but it has streamlined the process substantially and will hopefully one day grow up to be a mature MEG processing pipeline.
+-	The SAM swarm script successfully creates the swarm files, and the swarm files run successfully (according to the output), but the files that are created are bad (as in not the same as if each SAM command was run individually and not in a swarm). 
+- Future directions include solving the problem above, and making my scripts more useful and concise (any suggestions toward that latter goal would be appreciated!)
diff --git a/project_spring_2020/sample_file.py b/project_spring_2020/sample_file.py
diff --git a/setup.py b/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup, find_packages
+
+with open('README.md','r') as yeet:
+	long_description=yeet.read()
+
+setup(
+        name = 'MID_proc',
+        author = 'Christina Wusinich',
+        version = '0.2.0',
+	author_email='christinawusinich@gmail.com',
+        description = 'Some tools for processing MID behavioral and MEG data',
+	long_description=long_description,
+	long_description_content_type='text/markdown',
+	url='https://github.com/cwusinich/project_spring_2020',
+        license = 'Apache',
+        packages = find_packages(),
+	python_requires='>=3.6'
+     )
+
+