PonyPlayer
sonic.h
浏览该文件的文档.
1/* Sonic library
2 Copyright 2010
3 Bill Cox
4 This file is part of the Sonic Library.
5
6 This file is licensed under the Apache 2.0 license.
7*/
8
9/*
10The Sonic Library implements a new algorithm invented by Bill Cox for the
11specific purpose of speeding up speech by high factors at high quality. It
12generates smooth speech at speed up factors as high as 6X, possibly more. It is
13also capable of slowing down speech, and generates high quality results
14regardless of the speed up or slow down factor. For speeding up speech by 2X or
15more, the following equation is used:
16
17 newSamples = period/(speed - 1.0)
18 scale = 1.0/newSamples;
19
20where period is the current pitch period, determined using AMDF or any other
21pitch estimator, and speed is the speedup factor. If the current position in
22the input stream is pointed to by "samples", and the current output stream
23position is pointed to by "out", then newSamples number of samples can be
24generated with:
25
26 out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
27
28where t = 0 to newSamples - 1.
29
30For speed factors < 2X, the PICOLA algorithm is used. The above
31algorithm is first used to double the speed of one pitch period. Then, enough
32input is directly copied from the input to the output to achieve the desired
33speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived:
34
35 speed = (2*period + length)/(period + length)
36 speed*length + speed*period = 2*period + length
37 length(speed - 1) = 2*period - speed*period
38 length = period*(2 - speed)/(speed - 1)
39
40For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
41the output twice, and length of input is copied from the input to the output
42until the output desired speed is reached. The length of data copied is:
43
44 length = period*(speed - 0.5)/(1 - speed)
45
46For slow down factors below 0.5, no data is copied, and an algorithm
47similar to high speed factors is used.
48*/
49
50/* Uncomment this to use sin-wav based overlap add which in theory can improve
51 sound quality slightly, at the expense of lots of floating point math. */
52/* #define SONIC_USE_SIN */
53
54#ifdef __cplusplus
55extern "C" {
56#endif
57
58/* This specifies the range of voice pitches we try to match.
59 Note that if we go lower than 65, we could overflow in findPitchInRange */
60#ifndef SONIC_MIN_PITCH
61#define SONIC_MIN_PITCH 65
62#endif /* SONIC_MIN_PITCH */
63#ifndef SONIC_MAX_PITCH
64#define SONIC_MAX_PITCH 400
65#endif /* SONIC_MAX_PITCH */
66
67/* These are used to down-sample some inputs to improve speed */
68#define SONIC_AMDF_FREQ 4000
69
72
73/* For all of the following functions, numChannels is multiplied by numSamples
74 to determine the actual number of values read or returned. */
75
76/* Create a sonic stream. Return NULL only if we are out of memory and cannot
77 allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
79/* Destroy the sonic stream. */
81/* Use this to write floating point data to be speed up or down into the stream.
82 Values must be between -1 and 1. Return 0 if memory realloc failed,
83 otherwise 1 */
84int sonicWriteFloatToStream(sonicStream stream, float* samples, int numSamples);
85/* Use this to write 16-bit data to be speed up or down into the stream.
86 Return 0 if memory realloc failed, otherwise 1 */
87int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples);
88/* Use this to write 8-bit unsigned data to be speed up or down into the stream.
89 Return 0 if memory realloc failed, otherwise 1 */
90int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char* samples,
91 int numSamples);
92/* Use this to read floating point data out of the stream. Sometimes no data
93 will be available, and zero is returned, which is not an error condition. */
94int sonicReadFloatFromStream(sonicStream stream, float* samples,
95 int maxSamples);
96/* Use this to read 16-bit data out of the stream. Sometimes no data will
97 be available, and zero is returned, which is not an error condition. */
98int sonicReadShortFromStream(sonicStream stream, short* samples,
99 int maxSamples);
100/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data
101 will be available, and zero is returned, which is not an error condition. */
102int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
103 int maxSamples);
104/* Force the sonic stream to generate output using whatever data it currently
105 has. No extra delay will be added to the output, but flushing in the middle
106 of words could introduce distortion. */
107int sonicFlushStream(sonicStream stream);
108/* Return the number of samples in the output buffer */
110/* Get the speed of the stream. */
111float sonicGetSpeed(sonicStream stream);
112/* Set the speed of the stream. */
113void sonicSetSpeed(sonicStream stream, float speed);
114/* Get the pitch of the stream. */
115float sonicGetPitch(sonicStream stream);
116/* Set the pitch of the stream. */
117void sonicSetPitch(sonicStream stream, float pitch);
118/* Get the rate of the stream. */
119float sonicGetRate(sonicStream stream);
120/* Set the rate of the stream. */
121void sonicSetRate(sonicStream stream, float rate);
122/* Get the scaling factor of the stream. */
123float sonicGetVolume(sonicStream stream);
124/* Set the scaling factor of the stream. */
125void sonicSetVolume(sonicStream stream, float volume);
126/* Get the chord pitch setting. */
128/* Set chord pitch mode on or off. Default is off. See the documentation
129 page for a description of this feature. */
131/* Get the quality setting. */
132int sonicGetQuality(sonicStream stream);
133/* Set the "quality". Default 0 is virtually as good as 1, but very much
134 * faster. */
135void sonicSetQuality(sonicStream stream, int quality);
136/* Get the sample rate of the stream. */
138/* Set the sample rate of the stream. This will drop any samples that have not
139 * been read. */
141/* Get the number of channels. */
143/* Set the number of channels. This will drop any samples that have not been
144 * read. */
146/* This is a non-stream oriented interface to just change the speed of a sound
147 sample. It works in-place on the sample array, so there must be at least
148 speed*numSamples available space in the array. Returns the new number of
149 samples. */
150int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
151 float pitch, float rate, float volume,
152 int useChordPitch, int sampleRate, int numChannels);
153/* This is a non-stream oriented interface to just change the speed of a sound
154 sample. It works in-place on the sample array, so there must be at least
155 speed*numSamples available space in the array. Returns the new number of
156 samples. */
157int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
158 float pitch, float rate, float volume,
159 int useChordPitch, int sampleRate, int numChannels);
160
161#ifdef SONIC_SPECTROGRAM
162/*
163This code generates high quality spectrograms from sound samples, using
164Time-Aliased-FFTs as described at:
165
166 https://github.com/waywardgeek/spectrogram
167
168Basically, two adjacent pitch periods are overlap-added to create a sound
169sample that accurately represents the speech sound at that moment in time.
170This set of samples is converted to a spetral line using an FFT, and the result
171is saved as a single spectral line at that moment in time. The resulting
172spectral lines vary in resolution (it is equal to the number of samples in the
173pitch period), and the spacing of spectral lines also varies (proportional to
174the numver of samples in the pitch period).
175
176To generate a bitmap, linear interpolation is used to render the grayscale
177value at any particular point in time and frequency.
178*/
179
180#define SONIC_MAX_SPECTRUM_FREQ 5000
181
182struct sonicSpectrogramStruct;
183struct sonicBitmapStruct;
184typedef struct sonicSpectrogramStruct* sonicSpectrogram;
185typedef struct sonicBitmapStruct* sonicBitmap;
186
187/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each
188 pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size.
189 Rows are indexed top to bottom and columns are indexed left to right */
190struct sonicBitmapStruct {
191 unsigned char* data;
192 int numRows;
193 int numCols;
194};
195
196typedef struct sonicBitmapStruct* sonicBitmap;
197
198/* Enable coomputation of a spectrogram on the fly. */
199void sonicComputeSpectrogram(sonicStream stream);
200
201/* Get the spectrogram. */
202sonicSpectrogram sonicGetSpectrogram(sonicStream stream);
203
204/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram
205 has been called. */
206sonicSpectrogram sonicCreateSpectrogram(int sampleRate);
207
208/* Destroy the spectrotram. This is called automatically when calling
209 sonicDestroyStream. */
210void sonicDestroySpectrogram(sonicSpectrogram spectrogram);
211
212/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */
213sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
214 int numRows, int numCols);
215
216/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */
217void sonicDestroyBitmap(sonicBitmap bitmap);
218
219int sonicWritePGM(sonicBitmap bitmap, char* fileName);
220
221/* Add two pitch periods worth of samples to the spectrogram. There must be
222 2*period samples. Time should advance one pitch period for each call to
223 this function. */
224void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
225 short* samples, int period,
226 int numChannels);
227#endif /* SONIC_SPECTROGRAM */
228
229#ifdef __cplusplus
230}
231#endif
void sonicSetSampleRate(sonicStream stream, int sampleRate)
Definition: sonic.c:421
void sonicSetRate(sonicStream stream, float rate)
Definition: sonic.c:274
void sonicSetPitch(sonicStream stream, float pitch)
Definition: sonic.c:267
int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch, float rate, float volume, int useChordPitch, int sampleRate, int numChannels)
Definition: sonic.c:1230
int sonicFlushStream(sonicStream stream)
Definition: sonic.c:656
float sonicGetRate(sonicStream stream)
Definition: sonic.c:270
float sonicGetVolume(sonicStream stream)
Definition: sonic.c:299
sonicStream sonicCreateStream(int sampleRate, int numChannels)
Definition: sonic.c:395
int sonicGetQuality(sonicStream stream)
Definition: sonic.c:290
void sonicSetVolume(sonicStream stream, float volume)
Definition: sonic.c:302
int sonicSamplesAvailable(sonicStream stream)
Definition: sonic.c:687
int sonicWriteShortToStream(sonicStream stream, const short *samples, int numSamples)
Definition: sonic.c:1210
void sonicDestroyStream(sonicStream stream)
Definition: sonic.c:323
void sonicSetQuality(sonicStream stream, int quality)
Definition: sonic.c:294
void sonicSetChordPitch(sonicStream stream, int useChordPitch)
Definition: sonic.c:285
int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples)
Definition: sonic.c:600
int sonicGetNumChannels(sonicStream stream)
Definition: sonic.c:427
int sonicGetChordPitch(sonicStream stream)
Definition: sonic.c:282
float sonicGetSpeed(sonicStream stream)
Definition: sonic.c:258
struct sonicStreamStruct * sonicStream
Definition: sonic.h:71
void sonicSetSpeed(sonicStream stream, float speed)
Definition: sonic.c:261
int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples)
Definition: sonic.c:1220
int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch, float rate, float volume, int useChordPitch, int sampleRate, int numChannels)
Definition: sonic.c:1250
float sonicGetPitch(sonicStream stream)
Definition: sonic.c:264
int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples)
Definition: sonic.c:570
int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples)
Definition: sonic.c:1200
int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples)
Definition: sonic.c:625
int sonicGetSampleRate(sonicStream stream)
Definition: sonic.c:417
void sonicSetNumChannels(sonicStream stream, int numChannels)
Definition: sonic.c:431
Definition: sonic.c:192
float pitch
Definition: sonic.c:202
int quality
Definition: sonic.c:207
int sampleRate
Definition: sonic.c:219
float volume
Definition: sonic.c:201
float speed
Definition: sonic.c:200
float rate
Definition: sonic.c:203
int numChannels
Definition: sonic.c:208
int useChordPitch
Definition: sonic.c:206