Skip to content

Commit

Permalink
Merge pull request #35 from sbesson/idr0097_updates
Browse files Browse the repository at this point in the history
Idr0097 updates
  • Loading branch information
sbesson committed Feb 11, 2021
2 parents 785e372 + b8f62df commit 334968d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 29 deletions.
62 changes: 40 additions & 22 deletions pyidr/study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,32 @@ def __init__(self, study_file):
self.study.update(self.parse_data_doi(self.study, "Study Data DOI"))

self.components = []
for t in TYPES:
n = int(self.study.get('Study %ss Number' % t, 0))
for i in range(n):
self.log.debug("Parsing %s %g" % (t, i + 1))

d = self.parse(t, lines=self.get_lines(i + 1, t))
d.update({'Type': t})
d.update(self.study)
doi = self.parse_data_doi(d, "%s Data DOI" % t)
if doi:
d.update(doi)
self.parse_annotation_file(d)
self.components.append(d)

# Find number of screens and experiments
n_screens = int(self.study.get('Study Screens Number', 0))
n_experiments = int(self.study.get('Study Experiments Number', 0))
self.log.debug("Expecting %s screen(s) and %s experiment(s)" %
(n_screens, n_experiments))
if n_screens > 0 and n_experiments > 0:
component_regexp = '(Screen|Experiment)'
elif n_screens > 0:
component_regexp = '(Screen)'
elif n_experiments > 0:
component_regexp = '(Experiment)'
else:
raise Exception("Not enough screens and/or experiments")

# Find all study components in order
for i in range(n_screens + n_experiments):
lines, component_type = self.get_lines(i + 1, component_regexp)
d = self.parse(component_type, lines=lines)
d.update({'Type': component_type})
d.update(self.study)
doi = self.parse_data_doi(d, "%s Data DOI" % component_type)
if doi:
d.update(doi)
self.parse_annotation_file(d)
self.components.append(d)

if not self.components:
raise Exception("Need to define at least one screen or experiment")
Expand Down Expand Up @@ -161,23 +174,28 @@ def parse(self, scope, lines=None):
d[key] = value
return d

def get_lines(self, index, component_type):
PATTERN = re.compile(r"^%s Number\t(\d+)" % component_type)
def get_lines(self, index, component_regexp):
self.log.debug("Parsing %s %g" % (component_regexp, index))
PATTERN = re.compile(r"^%s Number\t(\d+)" % component_regexp)
found = False
lines = []
component_type = None
for idx, line in enumerate(self._study_lines):
m = PATTERN.match(line)
if m:
if int(m.group(1)) == index:
if int(m.group(2)) == index and found:
raise Exception("Duplicate component %g" % index)
elif int(m.group(2)) == index and not found:
found = True
elif int(m.group(1)) != index and found:
return lines
component_type = m.group(1)
elif int(m.group(2)) != index and found:
return lines, component_type
if found:
self._study_lines_used[idx].append(("get_lines", index))
lines.append(line)
if not lines:
raise Exception("Could not find %s %g" % (component_type, index))
return lines
raise Exception("Could not find %s %g" % (component_regexp, index))
return lines, component_type

def parse_annotation_file(self, component):
import glob
Expand All @@ -195,11 +213,11 @@ def parse_annotation_file(self, component):

# Generate GitHub annotation URL
if os.path.exists(os.path.join(self._dir, ".git")):
base_gh_url = "https://github.com/IDR/%s/blob/master/%s" % (
base_gh_url = "https://github.com/IDR/%s/blob/HEAD/%s" % (
m.group(1), m.group(3))
else:
base_gh_url = (
"https://github.com/IDR/idr-metadata/blob/master/%s" % name)
"https://github.com/IDR/idr-metadata/blob/HEAD/%s" % name)

# Try to find single annotation file in root directory
for extension in ['.csv', '.csv.gz']:
Expand Down
13 changes: 6 additions & 7 deletions scripts/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,15 @@ def studies(study_list, default_columns=["name", "path"]):
continue

logging.info("Finding containers for study %s" % study)
target = "Plate"
containers = glob(join(study, "screen[A-Z]"))
if containers:
assert not glob(join(study, "experiment*")), study
else:
target = "Dataset"
containers = glob(join(study, "experiment[A-Z]"))
containers = glob(
join(study, "screen[A-Z]")) + glob(join(study, "experiment[A-Z]"))

assert len(containers) >= 1
for container in sorted(containers):
if container.startswith(join(study, "screen")):
target = "Plate"
else:
target = "Dataset"
bulks = glob(join(container, "*-bulk.yml"))
bulks += glob(join(container, "**/*-bulk.yml"))
for bulk in bulks:
Expand Down

0 comments on commit 334968d

Please sign in to comment.