Module praatio.utilities.myMath
Various math utilities
Expand source code
'''
Various math utilities
'''
import math
def filterTimeSeriesData(filterFunc, featureTimeList, windowSize, index,
useEdgePadding):
'''
Filter time-stamped data values within a window
filterFunc could be medianFilter() or znormFilter()
It's ok to have other values in the list. eg
featureTimeList: [(time_0, .., featureA_0, ..),
(time_1, .., featureA_1, ..),
..]
'''
featureTimeList = [list(row) for row in featureTimeList]
featValues = [row[index] for row in featureTimeList]
featValues = filterFunc(featValues, windowSize,
useEdgePadding)
assert(len(featureTimeList) == len(featValues))
outputList = [piRow[:index] + [f0Val, ] + piRow[index + 1:]
for piRow, f0Val in zip(featureTimeList, featValues)]
return outputList
def znormalizeSpeakerData(featureTimeList, index, filterZeroValues):
'''
znormalize time series data
The idea is to normalize each speaker separately to be able
to compare data across several speakers for speaker-dependent
data like pitch range
To normalize a speakers data within a local window, use filterTimeSeriesData()
filterZeroValues: if True, don't consider zero values in the mean and stdDev
(recommended value for data like pitch or intensity)
'''
featureTimeList = [list(row) for row in featureTimeList]
featValues = [row[index] for row in featureTimeList]
if not filterZeroValues:
featValues = znormalizeData(featValues)
else:
featValuesNoZeroes = [val for val in featValues if val != '']
meanVal = mean(featValuesNoZeroes)
stdDevVal = stdDev(featValuesNoZeroes)
featValues = [(val - meanVal) / stdDevVal if val > 0 else 0 for val in featValues]
assert(len(featureTimeList) == len(featValues))
outputList = [piRow[:index] + [val, ] + piRow[index + 1:]
for piRow, val in zip(featureTimeList, featValues)]
return outputList
def medianFilter(dist, window, useEdgePadding):
'''
median filter each value in a dataset; filtering occurs within a given window
Median filtering is used to "smooth" out extreme values. It can be useful if
your data has lots of quick spikes. The larger the window, the flatter the output
becomes.
Given:
x = [1 1 1 9 5 2 4 7 4 5 1 5]
medianFilter(x, 5, False)
>> [1 1 1 2 4 5 4 4 4 5 1 5]
'''
return _stepFilter(median, dist, window, useEdgePadding)
def znormWindowFilter(dst, window, useEdgePadding, filterZeroValues):
'''
z-normalize each value in a dataset; normalization occurs within a given window
If you suspect that events are sensitive to local changes, (e.g. local changes in pitch
are more important absolute differences in pitch) then using windowed
znormalization is appropriate.
See znormalizeData() for more information on znormalization.
'''
def znormalizeCenterVal(valList):
valToNorm = valList[int(len(valList) / 2.0)]
return (valToNorm - mean(valList)) / stdDev(valList)
if not filterZeroValues:
filteredOutput = _stepFilter(znormalizeCenterVal, dist, window, useEdgePadding)
else:
zeroIndexList = []
nonzeroValList = []
for i, val in enumerate(dst):
if val > 0.0:
nonzeroValList.append(val)
else:
zeroIndexList.append(i)
filteredOutput = _stepFilter(znormalizeCenterVal, nonzeroValList, window, useEdgePadding)
for i in zeroIndexList:
filteredOutput.insert(i, 0.0)
return filteredOutput
def _stepFilter(filterFunc, dist, window, useEdgePadding):
offset = int(math.floor(window / 2.0))
length = len(dist)
returnList = []
for x in range(length):
dataToFilter = []
# If using edge padding or if 0 <= context <= length
if useEdgePadding or (((0 <= x - offset) and (x + offset < length))):
preContext = []
currentContext = [dist[x], ]
postContext = []
lastKnownLargeIndex = 0
for y in range(1, offset + 1): # 1-based
if x + y >= length:
if lastKnownLargeIndex == 0:
largeIndexValue = x
else:
largeIndexValue = lastKnownLargeIndex
else:
largeIndexValue = x + y
lastKnownLargeIndex = x + y
postContext.append(dist[largeIndexValue])
if x - y < 0:
smallIndexValue = 0
else:
smallIndexValue = x - y
preContext.insert(0, dist[smallIndexValue])
dataToFilter = preContext + currentContext + postContext
value = filterFunc(dataToFilter)
else:
value = dist[x]
returnList.append(value)
return returnList
def median(valList):
valList = valList[:]
valList.sort()
if len(valList) % 2 == 0: # Even
i = int(len(valList) / 2.0)
medianVal = (valList[i - 1] + valList[i]) / 2.0
else: # Odd
i = int(len(valList) / 2.0)
medianVal = valList[i]
return medianVal
def mean(valList):
return sum(valList) / float(len(valList))
def stdDev(valList):
meanVal = mean(valList)
squaredSum = sum([(val - meanVal) ** 2 for val in valList])
return math.sqrt(squaredSum / float(len(valList) - 1))
def znormalizeData(valList):
'''
Given a list of floats, return the z-normalized values of the floats
The formula is: z(v) = (v - mean) / stdDev
In effect, this scales all values to the range [-4, 4].
It can be used, for example, to compare the pitch values of different speakers who
naturally have different pitch ranges.
'''
valList = valList[:]
meanVal = mean(valList)
stdDevVal = stdDev(valList)
return [(val - meanVal) / stdDevVal for val in valList]
def rms(intensityValues):
'''Return the root mean square for the input set of values'''
intensityValues = [val ** 2 for val in intensityValues]
meanVal = sum(intensityValues) / len(intensityValues)
return math.sqrt(meanVal)
Functions
def filterTimeSeriesData(filterFunc, featureTimeList, windowSize, index, useEdgePadding)
-
Filter time-stamped data values within a window
filterFunc could be medianFilter() or znormFilter()
It's ok to have other values in the list. eg featureTimeList: [(time_0, .., featureA_0, ..), (time_1, .., featureA_1, ..), ..]
Expand source code
def filterTimeSeriesData(filterFunc, featureTimeList, windowSize, index, useEdgePadding): ''' Filter time-stamped data values within a window filterFunc could be medianFilter() or znormFilter() It's ok to have other values in the list. eg featureTimeList: [(time_0, .., featureA_0, ..), (time_1, .., featureA_1, ..), ..] ''' featureTimeList = [list(row) for row in featureTimeList] featValues = [row[index] for row in featureTimeList] featValues = filterFunc(featValues, windowSize, useEdgePadding) assert(len(featureTimeList) == len(featValues)) outputList = [piRow[:index] + [f0Val, ] + piRow[index + 1:] for piRow, f0Val in zip(featureTimeList, featValues)] return outputList
def mean(valList)
-
Expand source code
def mean(valList): return sum(valList) / float(len(valList))
def median(valList)
-
Expand source code
def median(valList): valList = valList[:] valList.sort() if len(valList) % 2 == 0: # Even i = int(len(valList) / 2.0) medianVal = (valList[i - 1] + valList[i]) / 2.0 else: # Odd i = int(len(valList) / 2.0) medianVal = valList[i] return medianVal
def medianFilter(dist, window, useEdgePadding)
-
median filter each value in a dataset; filtering occurs within a given window
Median filtering is used to "smooth" out extreme values. It can be useful if your data has lots of quick spikes. The larger the window, the flatter the output becomes. Given: x = [1 1 1 9 5 2 4 7 4 5 1 5] medianFilter(x, 5, False)
[1 1 1 2 4 5 4 4 4 5 1 5]
Expand source code
def medianFilter(dist, window, useEdgePadding): ''' median filter each value in a dataset; filtering occurs within a given window Median filtering is used to "smooth" out extreme values. It can be useful if your data has lots of quick spikes. The larger the window, the flatter the output becomes. Given: x = [1 1 1 9 5 2 4 7 4 5 1 5] medianFilter(x, 5, False) >> [1 1 1 2 4 5 4 4 4 5 1 5] ''' return _stepFilter(median, dist, window, useEdgePadding)
def rms(intensityValues)
-
Return the root mean square for the input set of values
Expand source code
def rms(intensityValues): '''Return the root mean square for the input set of values''' intensityValues = [val ** 2 for val in intensityValues] meanVal = sum(intensityValues) / len(intensityValues) return math.sqrt(meanVal)
def stdDev(valList)
-
Expand source code
def stdDev(valList): meanVal = mean(valList) squaredSum = sum([(val - meanVal) ** 2 for val in valList]) return math.sqrt(squaredSum / float(len(valList) - 1))
def znormWindowFilter(dst, window, useEdgePadding, filterZeroValues)
-
z-normalize each value in a dataset; normalization occurs within a given window
If you suspect that events are sensitive to local changes, (e.g. local changes in pitch are more important absolute differences in pitch) then using windowed znormalization is appropriate.
See znormalizeData() for more information on znormalization.
Expand source code
def znormWindowFilter(dst, window, useEdgePadding, filterZeroValues): ''' z-normalize each value in a dataset; normalization occurs within a given window If you suspect that events are sensitive to local changes, (e.g. local changes in pitch are more important absolute differences in pitch) then using windowed znormalization is appropriate. See znormalizeData() for more information on znormalization. ''' def znormalizeCenterVal(valList): valToNorm = valList[int(len(valList) / 2.0)] return (valToNorm - mean(valList)) / stdDev(valList) if not filterZeroValues: filteredOutput = _stepFilter(znormalizeCenterVal, dist, window, useEdgePadding) else: zeroIndexList = [] nonzeroValList = [] for i, val in enumerate(dst): if val > 0.0: nonzeroValList.append(val) else: zeroIndexList.append(i) filteredOutput = _stepFilter(znormalizeCenterVal, nonzeroValList, window, useEdgePadding) for i in zeroIndexList: filteredOutput.insert(i, 0.0) return filteredOutput
def znormalizeData(valList)
-
Given a list of floats, return the z-normalized values of the floats
The formula is: z(v) = (v - mean) / stdDev In effect, this scales all values to the range [-4, 4]. It can be used, for example, to compare the pitch values of different speakers who naturally have different pitch ranges.
Expand source code
def znormalizeData(valList): ''' Given a list of floats, return the z-normalized values of the floats The formula is: z(v) = (v - mean) / stdDev In effect, this scales all values to the range [-4, 4]. It can be used, for example, to compare the pitch values of different speakers who naturally have different pitch ranges. ''' valList = valList[:] meanVal = mean(valList) stdDevVal = stdDev(valList) return [(val - meanVal) / stdDevVal for val in valList]
def znormalizeSpeakerData(featureTimeList, index, filterZeroValues)
-
znormalize time series data
The idea is to normalize each speaker separately to be able to compare data across several speakers for speaker-dependent data like pitch range
To normalize a speakers data within a local window, use filterTimeSeriesData()
filterZeroValues: if True, don't consider zero values in the mean and stdDev (recommended value for data like pitch or intensity)
Expand source code
def znormalizeSpeakerData(featureTimeList, index, filterZeroValues): ''' znormalize time series data The idea is to normalize each speaker separately to be able to compare data across several speakers for speaker-dependent data like pitch range To normalize a speakers data within a local window, use filterTimeSeriesData() filterZeroValues: if True, don't consider zero values in the mean and stdDev (recommended value for data like pitch or intensity) ''' featureTimeList = [list(row) for row in featureTimeList] featValues = [row[index] for row in featureTimeList] if not filterZeroValues: featValues = znormalizeData(featValues) else: featValuesNoZeroes = [val for val in featValues if val != ''] meanVal = mean(featValuesNoZeroes) stdDevVal = stdDev(featValuesNoZeroes) featValues = [(val - meanVal) / stdDevVal if val > 0 else 0 for val in featValues] assert(len(featureTimeList) == len(featValues)) outputList = [piRow[:index] + [val, ] + piRow[index + 1:] for piRow, val in zip(featureTimeList, featValues)] return outputList