/*
 * autopick.cpp - automatic picker routines and infrastructure
 *
 * This file is part of Automaton.
 *
 * Copyright (C) 2002, 2003
 * Paul Gettings, Dep't of Geology & Geophysics
 * University of Utah
 *
 * This file is released under the terms of the software
 * license in the file "LICENSE" in the root directory of
 * this package.  If this file is missing or corrupt, please
 * contact the author to receive a new copy.
 *
 * Automaton is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  Use at your
 * own risk; your mileage may vary.
 *
 * Suggestions, improvements, and bug reports welcome at
 * <gettings@mines.utah.edu>
 */
/* First arrival time autopicker
 *
 * Use a fuzzy membership function to decide if a result
 * is a valid pick.  Membership function based on:
 *   amplitude of fit to empirical wavelet
 *   time offset from predicted first arrival
 *   correlation coefficient of best fit to wavelet
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#include "autopick.h"
#include "getrecord.h"

/* Functions
 */
/* automatically pick first arrivals
 *
 * data		seismic traces, in seismicData structure
 * parms	parameter structure, filled and checked
 * known	known good pick, in knownPick structure
 * out		file opened for write, where good picks will be output
 *		file position is not rewound, and file is not closed
 */
int autopick(seismicData data, autopickParameters parms, knownPick known, FILE *out, FILE *dbg)
{
  double d, t0, t1, a0, a1, t, a, r, m;
  double w;
  double start, end;
  double dx, dy;
  long i;

  /* compute the first arrival wavelet */
  if(!computeWavelet(&known)) {
    fprintf(stderr, "autopick: error computing wavelet from known pick. Abort.\n");
    fclose(dbg);
    return(0);
    }

  // compute initial amplitude, time offset, etc. using known pick
  if(!fit_wavelet(known.t, known.w, known.start, known.start+known.length, &a0, &t0, &r)) {
    fprintf(stderr, "autopick: cannot fit known wavelet for amplitude, etc. Abort\n");
    fclose(dbg);
    return(0);
    }
  dx = known.sx - known.x;
  dy = known.sy - known.y;
  d = sqrt(sqr(dx) + sqr(dy));

  // account for offset between source and known pick
  t0 -= (1000*d/parms.Velocity);
  a0 *= parms.Attenuation*d*d;

  /* start with the first trace */
  for(i=0; i<data.n; i++) {
    /*   compute expected time offset and amplitude */
      // offset between trace and source point
    dx = known.sx - data.x[i];
    dy = known.sy - data.y[i];
    d = sqrt(sqr(dx) + sqr(dy));
    if(d == 0) { // we are fitting source point
      a1 = a0*10; // fudge factor to keep amplitude finite, but make it big
      }
    else {
      // compute expected amplitude by multiplying a0 by 1/(a*d)^2
      a1 = a0/(parms.Attenuation*d*d);
      }
    // use the best guess velocity for the predicted time offset
    t1 = t0 + (1000*d/parms.Velocity); // convert to ms

     // comput start, end window time
    start = t0 - known.length + 1000*d/parms.MaxVelocity; // convert to ms
    end = t0 + known.length + 1000*d/parms.MinVelocity; // convert to ms

    /*   find best amplitude, time offset for wavelet */
    if(!fit_wavelet(data.data[i], known.w, start, end, &a, &t, &r)) {
      fprintf(stderr, "autopick: Trace %d - no good pick; wavelet not fit!\n", i+1);
      fprintf(dbg, "%8ld %8ld                                              %15.3lf                 %15.3lf                \n",
              data.shot, data.num[i], a1, t1);
      continue;
      }

    // reset decay constant for amplitude
    w = parms.AmpConst;
    parms.AmpConst *= a1; // rescale at each trace

    /*   compute membership function for best fit */
    m = M(r, a-a1, t-t1, parms);

    parms.AmpConst = w; // reset for next trace

    /*   if membership > threshhold, write pick output */
    if(m > parms.Threshhold) {
      fprintf(out, "%8ld %8ld %20.3lf\n", data.shot, data.num[i], t);
      fprintf(dbg, "%8ld %8ld %15.3lf %12lf %15.3lf %15.3lf %15.3lf %15.3lf %15.3lf\n", data.shot, data.num[i], t, m, a, a1, t, t1, r);
      }
    /*   else flag error */
    else {
      fprintf(stderr, "No good pick: Shot %6d Trace %6d [M=%6lf]\n", data.shot, data.num[i], m);
      fprintf(dbg, "%8ld %8ld                 %12lf %15.3lf %15.3lf %15.3lf %15.3lf %15.3lf\n", data.shot, data.num[i], m, a, a1, t, t1, r);
      }
    }

  return(1);
}


/* Constructors for seismic data, wavelet, and trace types
 *
 * Only allocates memory!  If memory allocation fails, then
 * returns 0 as the length.
 */
/* Allocates memory for new seismicData instance
 * Does not create the traces, only a pointer to memory
 * for np trace instances.  Create each trace when loading
 * data.
 */
seismicData newSeismic(unsigned long np)
{
  seismicData s;
  s.n = 0;
  if((s.data = (trace *)malloc(sizeof(trace)*np)) == NULL) {
    return(s);
    }
  if((s.x = (float *)malloc(sizeof(float)*np)) == NULL) {
    free(s.data); s.data = NULL; // cleanup alloc'd mem
    return(s);
    }
  if((s.y = (float *)malloc(sizeof(float)*np)) == NULL) {
    free(s.data); s.data = NULL;
    free(s.x); s.x = NULL; // cleanup alloc'd mem
    return(s);
    }
  if((s.num = (long *)malloc(sizeof(long)*np)) == NULL) {
    free(s.data); s.data = NULL;
    free(s.y); s.y = NULL;
    free(s.x); s.x = NULL; // cleanup alloc'd mem
    return(s);
    }
  s.n = np;
  return(s);
}

wavelet newWavelet(unsigned long np)
{
  wavelet w;
  w.n = 0;
  if((w.time = (float *)malloc(sizeof(float)*np)) == NULL) {
    return(w);
    }
  if((w.amp = (float *)malloc(sizeof(float)*np)) == NULL) {
    free(w.time); w.time=NULL; // cleanup allocated mem
    return(w);
    }
  w.n = np;
  return(w);
}

trace newTrace(unsigned long np)
{
  trace t;
  t.n = 0;
  t.time = t.amp = NULL;
  if((t.time = (float *)malloc(sizeof(float)*np)) == NULL) {
    return(t);
    }
  if((t.amp = (float *)malloc(sizeof(float)*np)) == NULL) {
    free(t.time); t.time = NULL; // cleanup alloc'd mem
    return(t);
    }
  t.n = np;
  return(t);
}

void killTrace(trace *t)
{
  free(t->time);
  free(t->amp);
  t->time = t->amp = NULL;
}

void killSeismic(seismicData *s)
{
  free(s->num);
  free(s->x);
  free(s->y);
  killTrace(s->data);
  free(s->data);
  s->num = NULL;
  s->x = s->y = NULL;
  s->data = NULL;
}

void killWavelet(wavelet *w)
{
  free(w->time);
  free(w->amp);
  w->time = w->amp = NULL;
  w->n = 0;
}


/* compute wavelet amplitude at time offset t, using wavelet W
 * if t < 0 (before wavelet start), returns first value of wavelet
 * if t longer than wavelet, returns last value of wavelet
 * returns linear interpolation between wavelet times for t in wavelet
 */
double wave(wavelet W, double t)
{
  unsigned long i;

  if(t < 0) return(W.amp[0]);
  for(i=0; i<W.n-1; i++) {
    if(t >= W.time[i] && t < W.time[i+1]) {
      return((t-W.time[i])*(W.amp[i+1]-W.amp[i])/(W.time[i+1] - W.time[i]) + W.amp[i]);
      }
    }
  // t not in wavelet, so return last wavelet value
  return(W.amp[W.n-1]);
}


/* least-squares fit for wavelet to trace
 *
 * fitting amplitude and find best time offset
 * use linear regression fit, since amplitude is linear:
 *   T = A * W
 * where A is the scalar amplitude, T is the trace data, and W is the wavelet
 *
 * T		trace data, of type trace
 * W		wavelet, of type wavelet
 * start	beginning of fit window, in ms, global time
 * end		end of fit window, in ms, global time
 * amp		return value: amplitude
 * dt		return value: time offset
 * r		return value: correlation coefficient
 */
int fit_wavelet(trace T, wavelet W, double start, double end, double *amp, double *dt, double *r)
{
  unsigned long i, k;
  double stw, st, sw, sw2, st2; // accumulators for linear regression
  double a, p, tb, ab, pb; // amplitude, correlation vars
  double y, t; // temp vars


  // move to start of window
  i = 0;
  while((i < T.n) && (T.time[i] < start)) i++;
  if(i>=T.n) return(0); // no trace data in window!
  k = i;

  ab = 0; tb = -1;
  pb = -2;
  while((k < T.n) && (T.time[k] <= end)) {
    // fit the wavelet starting at index k
    /* algorithm note:
     * want to find amplitude to best fit T=a*W
     * for the various points of T and W: T_i = a*W_i
     * This is a linear regression problem!
     */
    // find best amplitude using linear regression 
    stw = st = sw = sw2 = st2 = 0;
    for(i=0; i<W.n; i++) {
      t = T.time[i+k]-T.time[k]; // time offset
      y = wave(W, t); // wavelet value at time offset
      stw += T.amp[i+k]*y;
      st  += T.amp[i+k];
      sw  += y;
      sw2 += y*y;
      st2 += T.amp[i+k]*T.amp[i+k];
      }
    a = (W.n*stw - (sw*st)) / (W.n*sw2 - sw*sw);
    p = (W.n*stw - (sw*st)) / ( sqrt(W.n*sw2 - sw*sw) * sqrt(W.n*st2 - st*st) );
    if(a == 0) { // if amplitude is 0, there is no wave
      k++;	 // so this cannot be the correct pick!
      continue;
      }
    // if first time, make this the best
    if(pb == -2) {
      ab = a;
      tb = T.time[k];
      pb = p;
      }
    // if correlation better than current best, choose this one
    if(p > pb) {
      ab = a;
      tb = T.time[k];
      pb = p;
      }
    /* XXX DEBUG */
    //fprintf(stderr, "%5d %12lg %12lg %12lg\n", k, a, p, pb);
    k++; // next time offset
    }
  *amp = ab;
  *dt = tb;
  *r = pb;
  if(pb == -2) { // no fit at all!
    return(0);
    }
  return(1);
}


/* Membership function
 *
 * This function computes a degree of membership in the set {"good pick"}.
 * Return values are scaled to [0,1], where 0==>not in set, 1==>in set
 * The actual function is:
 *   m = (W1*r + W2*exp(-da/C) + W3*exp(-dt/C))/(W1+W2+W3)
 * where
 *   r is the correlation coefficient of the wavelet fit
 *   da is the difference between expected and fit amplitude
 *   dt is the difference between expected and fit time offset
 *   W1, W2, W3 are weighting factors for each term
 *   C is a constant, different for each exponential function
 */
double M(double r, double da, double dt, autopickParameters p)
{
  int i;
  double m;

  m = p.Mw[0] * fabs(r); // correlation coefficient
  m+= p.Mw[1] / exp(fabs(da)/p.AmpConst); // amplitude function
  m+= p.Mw[2] / exp(fabs(dt)/p.TimeConst);  // time function

  m /= p.Mw[0]+p.Mw[1]+p.Mw[2]; // normalize to [0,1]

  return(m);
}

/* Compute empirical wavelet from a known pick on a data trace
 */
int computeWavelet(knownPick *k)
{
  unsigned long i, j;
  double max;

  // find range of amplitudes
  i=0;
  while(k->t.time[i] < k->start && i < k->t.n) i++;
  if(i >= k->t.n) return(0); // trace ends before start time!
  j=i;
  max = fabs(k->t.amp[i]);
  while(k->t.time[i] < (k->start+k->length) && i < k->t.n) {
    if(fabs(k->t.amp[i]) > max) max = fabs(k->t.amp[i]);
    i++;
    }
  if(i >= k->t.n) return(0); // trace ends before end time!
  k->w = newWavelet(i-j); // allocate wavelet long enough
  if(k->w.n == 0) {
    fprintf(stderr, "computeWavelet: cannot allocate new wavelet!\n");
    return(0);
    }
  i=j;
  while(k->t.time[i] < (k->start+k->length)) {
    k->w.time[i-j] = k->t.time[i]-k->t.time[j];
    k->w.amp[i-j] = k->t.amp[i]/max; // scale to [-1,1]
    i++;
    }
  return(1);
}

/* Read seismic data from file into structure.
 *
 * For a really large data set, this might be too expensive and we will
 * need to window data or only load on demand.  Can also just throw
 * more RAM at the problem....
 *
 * This reads data from an ASCII format, NOT SEGY/SU format!
 * Either convert SEGY to ASCII or use the segy.h routines!
 *
 * The ASCII format only allows for one shot per file!
 *
 * Data format specified in "README.data_formats"
 */
seismicData readSeismicData(char *name)
{
  seismicData s;
  unsigned long i, j, np;
  FILE *fp;
  char *line, record[1024];

  if((fp = fopen(name, "rb")) == NULL) {
    fprintf(stderr, "readSeismicData: cannot open %s for read. Abort.\n", name);
    exit(1);
    }

  // read the number of traces in the data set
  i=0;
  while(!feof(fp)) {
    if(!nextRecord(fp, record)) continue;
    if(record[0] == 'G') {
      i++;
      }
    }
  np = i;

  // allocate data struct
  s = newSeismic(np);
  if(s.n == 0) {
    fprintf(stderr, "readSeismicData: cannot allocate memory for seismicData struct.\n");
    exit(1);
    }

  // rewind the file
  rewind(fp);
  // skip header record
  if(!nextRecord(fp, record)) {
    if(feof(fp)) {
      fprintf(stderr, "readSeismicData: unexpected EOF while reading header record.\n");
      exit(1);
      }
    else {
      fprintf(stderr, "readSeismicData: error reading header record.\n");
      exit(1);
      }
    }

  // read shot number
  if(!nextRecord(fp, record)) {
    if(feof(fp)) {
      fprintf(stderr, "readSeismicData: unexpected EOF while reading shot number.\n");
      exit(1);
      }
    else {
      fprintf(stderr, "readSeismicData: error reading shot number.\n");
      exit(1);
      }
    }
  sscanf(record, "%d", &s.shot);

  // read each trace and fill into struct
  for(i=0; i<s.n; i++) {
    if(!nextRecord(fp, record)) {
      fprintf(stderr, "readSeismicData: unexpected EOF before end of data in file %s: looking for geophone record %d\n", name, i);
      exit(1);
      }
    if(record[0] == 'G') {
      // geophone record header
      sscanf(record, "G %d %f %f %d\n", &s.num[i], &s.x[i], &s.y[i], &np);
      }

    // read np = # of points in the trace
    s.data[i] = newTrace(np);
    if(s.data[i].n == 0) {
      fprintf(stderr, "readSeismicData: cannot allocate memory for trace %d.\n", i+1);
      exit(1);
      }
    // read numbers into s.data[i]
    for(j=0; j<s.data[i].n; j++) {
      if(!nextRecord(fp, record)) {
        fprintf(stderr, "readSeismicData: EOF before end of data: reading data for trace %d, record %d\n", s.num[i], j+1);
        exit(1);
        }
      sscanf(record, "%f %f", &s.data[i].time[j], &s.data[i].amp[j]);
      }
    }

  fclose(fp);
  return(s);
}


/* Read pick start time from file, and fill the knownPick structure from
 * seismic data and algorithm parameters.
 *
 * Assume we have translated the data into our very own format, since
 * then we can just use a converter in a pipeline or local temp space
 *
 * Data format specified in "README.data_formats"
 */
knownPick readKnownPick(char *name, seismicData data, autopickParameters p)
{
  knownPick k;
  long i, j, l, shot;
  char record[1024];
  FILE *fp;

  if((fp = fopen(name, "rb")) == NULL) {
    fprintf(stderr, "readKnownPick: cannot open %s for read.\n", name);
    exit(1);
    }

  // determine pick start time from file, and assign to k.start
  bool flag=false;
  while(nextRecord(fp, record)) { // nextRecord only fails on EOF
    sscanf(record, "%ld %ld %f %f %f\n", &shot, &l, &k.start, &k.sx, &k.sy);
    if(shot == data.shot) {
      flag = true;
      break;
      }
    }
  if(!flag) {
    fprintf(stderr, "readKnownPick: unexpected EOF reading known pick file;");
    fprintf(stderr, " no pick for shot %d?\n", data.shot);
    exit(1);
    }

  // which trace in the seismic data is the known one?
  // determine index
  j = -1;
  for(i=0; i<data.n; i++) {
    if(data.num[i] == l) {
      j = i;
      break;
      }
    }
  // if can't find, die
  if(j == -1) {
    fprintf(stderr, "readKnownPick: no geophone matching number %ld in seismic data; shot #%d.\n", l, shot);
    exit(1);
    }
  k.t = data.data[j];
  k.x = data.x[j];
  k.y = data.y[j];

  k.length = p.WaveTime;

  fclose(fp);
  return(k);
}


/* Get the next data record
 *
 * Cleans the line and returns in buffer passed.  char *record should hold at
 * least 1024 characters.
 *
 * returns 0 on error
 */
int nextRecord(FILE *fp, char *record)
{
  char *line;

  while((line = getrecord(fp)) == NULL && !feof(fp)) continue;
  if(feof(fp)) {
    return(0);
    }
  strncpy(record, line, 1024);
  strstrip(record);
  return(1);
}


/* Parse a command file for algorithm parameters
 *
 * name is the filename; if not openable, exit to system
 * returns autopickParameters struct filled with values
 * check returned struct for legal values!
 */
autopickParameters grokCommandFile(char *name)
{
  autopickParameters p;
  FILE *fp;
  char line[1024];
  char *ptr, token[1024], value[1024];

  p.Threshhold = -1.0;
  p.AmpConst = -1.0;
  p.TimeConst = -1.0;
  p.Mw[0] = p.Mw[1] = p.Mw[2] = -1.0;
  p.MinVelocity = p.MaxVelocity = -1.0;
  p.Velocity = -1.0;
  p.Attenuation = 1.0;	// default
  p.WaveTime = -1.0;

  if((fp = fopen(name, "rt")) == NULL) {
    fprintf(stderr, "grokCommandFile: cannot open command file %s. Abort.\n", name);
    exit(1);
    }

  while(!feof(fp)) {
    // get next non-comment line
    if((ptr = getrecord(fp)) == NULL) continue;
    strcpy(line, ptr);
    // break line on "=" and parse
    if((ptr = strtok(line, "=")) == NULL) {
      continue; // not a valid line
      }
    strcpy(token, ptr);
    ptr += strlen(token)+1;
    strcpy(value, ptr); // rest of line
    strstrip(token);
    strstrip(value);

    // select on token, setting values in structure
    if(!strcasecmp(token, "threshhold")) { p.Threshhold = atof(value); }
    else if(!strcasecmp(token, "ampconstant")) { p.AmpConst = atof(value);  }
    else if(!strcasecmp(token, "timeconstant")) { p.TimeConst = atof(value); }
    else if(!strcasecmp(token, "weights")) {
      // parse the three weights
      sscanf(value, "%lf %lf %lf", &p.Mw[0], &p.Mw[1], &p.Mw[2]);
      }
    else if(!strcasecmp(token, "minv")) { p.MinVelocity = atof(value); }
    else if(!strcasecmp(token, "maxv")) { p.MaxVelocity = atof(value); }
    else if(!strcasecmp(token, "v")) { p.Velocity = atof(value); }
    else if(!strcasecmp(token, "alpha")) { p.Attenuation = atof(value); }
    else if(!strcasecmp(token, "wavetime")) { p.WaveTime = atof(value); }
    }
  return(p);
}


/* remove leading and trailing whitespace from s
 */
void strstrip(char *s)
{
  char *p, t[1024];
  int i;

  p = s;
  while(*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' && (p-s) < strlen(s)) p++;
  strcpy(t, p);
  p = t+strlen(t)-1; // skip NULL at end of string
  while(*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'  && p >= t) p--;
  *(p+1) = '\0';
  strcpy(s, t);
}

