Generating wave sound from text file - python-2.7

I have generated sequence of frequency sound from text file using :
import mmap
import math
import pyaudio
fh = open('/home/jay/Documents/try.txt', 'rb')
m = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)
ba = bytearray(m)
#sudo apt-get install python-pyaudio
PyAudio = pyaudio.PyAudio
#See http://en.wikipedia.org/wiki/Bit_rate#Audio
BITRATE = 16000 #number of frames per second/frameset.
for freq in ba:
#See http://www.phy.mtu.edu/~suits/notefreqs.html
FREQUENCY = 300 + freq #Hz, waves per second, 261.63=C4-note.
LENGTH = 1 #seconds to play sound
NUMBEROFFRAMES = int(BITRATE * LENGTH)
RESTFRAMES = NUMBEROFFRAMES % BITRATE
WAVEDATA = ''
for x in xrange(NUMBEROFFRAMES):
WAVEDATA = WAVEDATA+chr(int(math.sin(x/((BITRATE/FREQUENCY)/math.pi))*127+128))
#fill remainder of frameset with silence
for x in xrange(RESTFRAMES):
WAVEDATA = WAVEDATA+chr(128)
p = PyAudio()
stream = p.open(format = p.get_format_from_width(1),
channels = 1,
rate = BITRATE,
output = True)
stream.write(WAVEDATA)
stream.stop_stream()
stream.close()
p.terminate()
(try.txt can be any text file you want)
But its having some noise in between frequency sound how can i remove it and save sequence of all frequncy played in .wave or .mp3 file?
Sorry i am still learning so if i am not clear in asking.
Thanks,
Jay

Biggest problem here is that you open/close audio stream at each frequency.
Instead, keep the stream open and you'll get a lot less "clicks". That doesn't mean you'll get no clicks, probably because processing time is sometimes too long and the stream is interrupted. The best way would be to multithread generation and write to be more reactive, but that's another story...
I also added .wav save capacity. My output filenames. Works very well.
Fixed code (runs with Python 3)
import mmap
import math
import pyaudio,wave
import array
fh = open('K:\out.txt', 'rb')
m = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)
ba = bytearray(m)
#ba = [300,400,500,400,200]
#sudo apt-get install python-pyaudio
PyAudio = pyaudio.PyAudio
#See http://en.wikipedia.org/wiki/Bit_rate#Audio
BITRATE = 16000 #number of frames per second/frameset.
p = PyAudio()
stream = p.open(format = p.get_format_from_width(1),
channels = 1,
rate = BITRATE,
output = True)
wf=wave.open("K:\wavout.wav","wb")
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt8)) # byte = 8 bits else trashed
wf.setframerate(BITRATE)
for freq in ba:
#See http://www.phy.mtu.edu/~suits/notefreqs.html
FREQUENCY = 300 + freq #Hz, waves per second, 261.63=C4-note.
print('freq '+str(FREQUENCY))
LENGTH = 1 #seconds to play sound
NUMBEROFFRAMES = int(BITRATE * LENGTH)
RESTFRAMES = NUMBEROFFRAMES % BITRATE
WAVEDATA = list()
for x in range(NUMBEROFFRAMES):
v = int(math.sin(x/((BITRATE/FREQUENCY)/math.pi))*127+128)
WAVEDATA.append(v)
#fill remainder of frameset with silence
WAVEDATA+=[128]*RESTFRAMES
b = array.array('B', WAVEDATA).tostring()
wf.writeframes(b)
stream.write(b)
#print data
wf.close()
stream.stop_stream()
stream.close()
p.terminate()

Related

My neural network takes too much time to train one epoch

I am training a neural network which tries to classify a traffic signs, but it takes too much time to train only one epoch, maybe 30+ mins for just one epoch, I have set the batch size to 64 and the learning rate to be 0.002, the input is 20x20 pixels with 3 channels, and the model summary shows that it is training 173,931 parameters, is that too much or good?
Here is the network architecture
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#Convolutional Layers
self.conv1 = nn.Conv2d(3,16,3,padding=1)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#Max Pooling Layers
self.pool = nn.MaxPool2d(2,2)
#Linear Fully connected layers
self.fc1 = nn.Linear(32*5*5,200)
self.fc2 = nn.Linear(200,43)
#Dropout
self.dropout = nn.Dropout(p=0.25)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1,32*5*5)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Here is the optimizer instance
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optim = optim.SGD(model.parameters(),lr = 0.002)
Here is the training code
epochs = 20
valid_loss_min = np.Inf
print("Training the network")
for epoch in range (1,epochs+1):
train_loss = 0
valid_loss = 0
model.train()
for data,target in train_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
optim.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optim.step()
train_loss += loss.item()*data.size(0)
#########################
###### Validate #########
model.eval()
for data,target in valid_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_data.dataset)
valid_loss = train/len(valid_data.dataset)
print("Epoch {}.....Train Loss = {:.6f}....Valid Loss = {:.6f}".format(epoch,train_loss,valid_loss))
if valid_loss < valid_loss_min:
torch.save(model.state_dict(), 'model_traffic.pt')
print("Valid Loss min {:.6f} >>> {:.6f}".format(valid_loss_min, valid_loss))
I am using GPU through google colab

tensorflow error: InvalidArgumentError: Shape mismatch in tuple component 1. Expected [1], got [5]

I am trying to construct a batch of (wav_file, label) pair.
wav file labels and paths are listed in dev.csv.
below code is not working,
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
threads = 1
batch_size = 5
global record_defaults
record_defaults = [['/Users/phoenix/workspace/dataset/data_thchs30/dev/A11_101.wav'], ['8.26'], ['七十 年代 末 我 外出 求学 母亲 叮咛 我 吃饭 要 细嚼慢咽 学习 要 深 钻 细 研']]
def read_record(filename_queue, num_records):
reader = tf.TextLineReader()
key, value = reader.read_up_to(filename_queue, num_records)
wav_filename, duration, transcript = tf.decode_csv(value, record_defaults, field_delim=",")
wav_reader = tf.WholeFileReader()
wav_key, wav_value = wav_reader.read_up_to(tf.train.string_input_producer(wav_filename, shuffle=False, capacity=num_records), num_records)
return [wav_key, transcript] # throw errors
# return [wav_key, wav_value] # works
# return [wav_filename, duration, transcript] # works
data_queue = tf.train.string_input_producer(tf.train.match_filenames_once('dev.csv'), shuffle=False)
batch_data = [read_record(data_queue, batch_size) for _ in range(threads)]
capacity = threads * batch_size
batch_values = tf.train.batch_join(batch_data, batch_size=batch_size, capacity=capacity, enqueue_many=True)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
print(coord)
threads = tf.train.start_queue_runners(coord=coord)
print("threads num: " + str(threads))
try:
step = 0
while not coord.should_stop():
step += 1
feat = sess.run([batch_values])
print("line:", step, feat)
except tf.errors.OutOfRangeError:
print(' training for 1 epochs, %d steps', step)
finally:
coord.request_stop()
coord.join(threads)
throw errors below, how can I fix it?:
dev.csv content as below:
/Users/phoenix/workspace/dataset/data_thchs30/dev/A11_101.wav,8.26,qi shi nian dai mo wo wai chu qiu xue
/Users/phoenix/workspace/dataset/data_thchs30/dev/A11_119.wav,6.9,chen yun tong shi yao qiu gan bu men ren zhen xue xi
I tried to rewrite your code like this.
This is my observation.
The error is no longer thrown. And the values are returned.
An obvious discrepancy is that the size of the batch for transcript is double that specified. So it is 4 instead of 2. It doubles for some reason. No such problem for the audio binary.
shapes=[tf.TensorShape(()),tf.TensorShape(batch_size,)] is based on an error I saw which mentioned that I have to specify this using TensorShape. I didn't find the documentation of any help but it is mentioned there.
shapes: (Optional.) A list of fully-defined TensorShape objects with the same length as dtypes, or None.
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.DEBUG)
FLAGS = tf.app.flags.FLAGS
threads = 1
batch_size = 2
record_defaults = [['D:/male.wav'], ['8.26'], ['七十 年代 末 我 外出 求学 母亲 叮咛 我 吃饭 要 细嚼慢咽 学习 要 深 钻 细 研']]
def readbatch(data_queue) :
reader = tf.TextLineReader()
_, rows = reader.read_up_to(data_queue, batch_size)
wav_filename, duration, transcript = tf.decode_csv(rows, record_defaults,field_delim=",")
audioreader = tf.WholeFileReader()
_, audio = audioreader.read( tf.train.string_input_producer(wav_filename) )
return [audio,transcript]
data_queue = tf.train.string_input_producer(
tf.train.match_filenames_once('D:/Book1.csv'), shuffle=False)
batch_data = [readbatch(data_queue) for _ in range(threads)]
capacity = threads * batch_size
batch_values = tf.train.batch_join(batch_data, shapes=[tf.TensorShape(()),tf.TensorShape(batch_size,)], capacity=capacity, batch_size=batch_size, enqueue_many=False )
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
step = 0
while not coord.should_stop():
step += 1
feat = sess.run([batch_values])
audio = feat[0][0]
print ('Size of audio is ' + str(audio.size))
script = feat[0][1]
print ('Size of script is ' + str(script.size))
except tf.errors.OutOfRangeError:
print(' training for 1 epochs, %d steps', step)
finally:
coord.request_stop()
coord.join(threads)
A sample dataset proves that there is an extra pair.
[[array([b'Text2', b'Text1'], dtype=object), array([[b'Translation-1', b'Translation-2'],
[b'Translation-1', b'Translation-2']], dtype=object)]]

Input query for python code

So I have created this code for my research, but I want to use it for plenty of data files, I do not want to do it manually, which means retyping some lines in my code to use desired file. How to use input command in python (I work with python 2.7 on Windows OS) to use it faster, just by typing name of desired datafile. My code so far:
import iodata as io
import matplotlib.pyplot as plt
import numpy as np
import time
from scipy.signal import welch
from scipy import signal
testInstance = io.InputConverter()
start = time.time()
conversionError = io.ConversionError()
#data = testInstance.convert(r"S:\Doktorat\Python\", 1", conversionError)
data = testInstance.convert(r"/Users/PycharmProjects/Hugo/20160401", "201604010000", conversionError)
end = time.time()
print("time elapsed " + str(end - start))
if(conversionError.conversionSucces):
print("Conversion succesful")
if(conversionError.conversionSucces == False):
print("Conversion failed: " + conversionError.conversionErrorLog)
print "Done!"
# Create a new subplot for two cannals 1 & 3
a = np.amin(data.data)
Bx = data.data[0,]
By = data.data[1,]
dt = float(300)/266350
Fs = 1/dt
t = np.arange(0,300,dt*1e3)
N = len(Bx)
M = len(By)
time = np.linspace(0,300,N)
time2 = np.linspace(0,300,M)
filename = 'C:/Users/PycharmProjects/Hugo/20160401/201604010000.dat'
d = open(filename,'rb')
degree = u"\u00b0"
headersize = 64
header = d.read(headersize)
ax1 = plt.subplot(211)
ax1.set_title(header[:16] + ', ' + # station name
'Canals: '+header[32:33]+' and '+header[34:35]+ ', ' # canals
+'Temp'+header[38:43]+degree+'C' # temperature
+', '+'Time:'+header[26:32]+', '+'Date'+' '+header[16:26]) # date
plt.ylabel('Pico Tesle [pT]')
plt.xlabel('Time [ms]')
plt.grid()
plt.plot(time[51:-14], Bx[51:-14], label='Canal 1', color='r', linewidth=0.1, linestyle="-")
plt.plot(time2[1:-14], By[1:-14], label='Canal 3', color='b', linewidth=0.1, linestyle="-")
plt.legend(loc='upper right', frameon=False, )
# Create a new subplot for FFT
plt.subplot(212)
plt.title('Fast Fourier Transform')
plt.ylabel('Power [a.u.]')
plt.xlabel('Frequency Hz')
xaxis2 = np.arange(0,470,10)
plt.xticks(xaxis2)
fft1 = (Bx[51:-14])
fft2 = (By[1:-14])
plt.grid()
# Loop for FFT data
for dataset in [fft1]:
dataset = np.asarray(dataset)
freqs, psd = welch(dataset, fs=266336/300, window='hamming', nperseg=8192)
plt.semilogy(freqs, psd/dataset.size**0, color='r')
for dataset2 in [fft2]:
dataset2 = np.asarray(dataset2)
freqs2, psd2 = welch(dataset2, fs=266336/300, window='hamming', nperseg=8192)
plt.semilogy(freqs2, psd2/dataset2.size**0, color='b')
plt.show()
As you can see there are some places where it would be better to put input and when I run the code I can write names of filenames etc. to python instead of creating every single pythonfile, with specified info in the code.
Btw. I use Pycharm to my python.
If all you are trying to do is get rid of the hardcoded pathname, you should be able to format your name string with input variables
name = raw_input("Name: ")
measurement = raw_input("Measurement: ")
filename = "C:/Users/PycharmProjects/{0}/{1}".format(name, measurement)
see raw_input and string formatting

PyAudio recorder script IOError: [Errno Input overflowed] -9981

The code below is what I use to record audio until the "Enter" key is pressed it returns an exception,
import pyaudio
import wave
import curses
from time import gmtime, strftime
import sys, select, os
# Name of sub-directory where WAVE files are placed
current_experiment_path = os.path.dirname(os.path.realpath(__file__))
subdir_recording = '/recording/'
# print current_experiment_path + subdir_recording
# Variables for Pyaudio
chunk = 1024
format = pyaudio.paInt16
channels = 2
rate = 48000
# Set variable for the labelling of the recorded WAVE file.
timestamp = strftime("%Y-%m-%d-%H:%M:%S", gmtime())
#wave_output_filename = '%s.wav' % self.get('subject_nr')
wave_output_filename = '%s.wav' % timestamp
print current_experiment_path + subdir_recording + wave_output_filename
# pyaudio recording stuff
p = pyaudio.PyAudio()
stream = p.open(format = format,
channels = channels,
rate = rate,
input = True,
frames_per_buffer = chunk)
print "* recording"
# Create an empty list for audio recording
frames = []
# Record audio until Enter is pressed
i = 0
while True:
os.system('cls' if os.name == 'nt' else 'clear')
print "Recording Audio. Press Enter to stop recording and save file in " + wave_output_filename
print i
if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
line = raw_input()
# Record data audio data
data = stream.read(chunk)
# Add the data to a buffer (a list of chunks)
frames.append(data)
break
i += 1
print("* done recording")
# Close the audio recording stream
stream.stop_stream()
stream.close()
p.terminate()
# write data to WAVE file
wf = wave.open(current_experiment_path + subdir_recording + wave_output_filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(format))
wf.setframerate(rate)
wf.writeframes(''.join(frames))
wf.close()
The exception produced is this
Recording Audio. Press Enter to stop recording and save file in 2015-11-20-22:15:38.wav
925
Traceback (most recent call last):
File "audio-record-timestamp.py", line 51, in <module>
data = stream.read(chunk)
File "/Library/Python/2.7/site-packages/pyaudio.py", line 605, in read
return pa.read_stream(self._stream, num_frames)
IOError: [Errno Input overflowed] -9981
What is producing the exception? I tried changing the chunk size (512,256,8192) it doesn't work. Changed the while loop condition and it didn't work.
I had a similar problem; there are 3 ways to solve it (that I could find)
set rate=24000
add option "exception_on_overflow=False" to the "read()" call, that is, make it "stream.read(chunk, exception_on_overflow=False)"
use callbacks
Here is, for your convenience, an example o "using callbacks"
#!/usr/bin/python
import sys, os, math, time, pyaudio
try:
import numpy
except:
numpy = None
rate=48000
chan=2
sofar=0
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
global sofar
sofar += len(in_data)
if numpy:
f = numpy.fromstring(in_data, dtype=numpy.int16)
sys.stderr.write('length %6d sofar %6d std %4.1f \r' % \
(len(in_data), sofar, numpy.std(f)))
else:
sys.stderr.write('length %6d sofar %6d \r' % \
(len(in_data), sofar))
data = None
return (data, pyaudio.paContinue)
stream = p.open(format=pyaudio.paInt16, channels=chan, rate=rate,
input=True, stream_callback=callback)
while True:
time.sleep(1)

Convert sample point to time values

import sys
import serial
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
port = "COM11"
baud = 9600
timeout=1
ser = serial.Serial()
ser.port = port
ser.baudrate = baud
ser.timeout = timeout
a1 = deque([0.0]*100)
#ax = plt.axes(xlim=(0, 100), ylim=(0, 1000))
line, = plt.plot(a1)
plt.ion()
plt.ylim([0,1000])
try:
ser.open()
except:
sys.stderr.write("Error opening serial port %s\n" % (ser.portstr) )
sys.exit(1)
#ser.setRtsCts(0)
while 1:
# Read from serial port, blocking
data = ser.read(1)
# If there is more than 1 byte, read the rest
n = ser.inWaiting()
data = data + ser.read(n)
#sys.stdout.write(data)
print(a1)
a1.appendleft((data))
datatoplot = a1.pop()
line.set_ydata(a1)
plt.draw()
I am getting a plot between serial port values and sample points. I want to plot serial plot values vs time. Is there a way to convert sample points to time values, something like how to we convert sample point to frequency values using freqs = scipy.fftpack.fftfreq(n, d)
Thanks
If you want to plot the data against time from the start of the program, then:
import time
t0 = time.time()
tlist = deque([np.nan] * 100)
while 1:
# read the serial data ...
# when you have read a sample, capture the time difference
# and put it into a queue (similarly to the data values)
deltat = time.time() - t0
dlist.appendleft((deltat))
# remember to pop the data, as well
dlist.pop()
a1.pop()
# set the x and y data
line.set_xdata(tlist)
line.set_ydata(a1)
# draw it
plt.draw()
Now you have the number of seconds from the start of the program on the X axis.
If you want to have the real time shown, then use datetime.datetime objects:
import datetime
dlist = deque([datetime.datetime.now()] * 100)
while 1:
# capture the serial data ...
dilst.appendleft((datetime.datetime.now()))
# everything else as above
This should give you a plot with real time on the X axis.
import sys
import serial
import numpy as np
import matplotlib.pyplot as plt
import time
from collections import deque
from scipy import arange
port = "COM13"
baud = 9600
timeout=1
ser = serial.Serial()
ser.port = port
ser.baudrate = baud
ser.timeout = timeout
t0=time.time()
tlist = deque([np.nan]*10)
a1 = deque([0.0]*10)
#ax = plt.axes(xlim=(0, 100), ylim=(0, 1000))
line, = plt.plot(a1)
plt.ion()
plt.ylim([-100,100])
plt.grid(b=True,which= 'major' , color= 'g' , linestyle= '--')
#plt.grid(b=True,which= 'minor' , color= '-m' , linestyle= '--')
try:
ser.open()
except:
sys.stderr.write("Error opening serial port %s\n" % (ser.portstr) )
sys.exit(1)
#ser.setRtsCts(0)
while 1:
# Read from serial port, blocking
data = ser.read(1)
# If there is more than 1 byte, read the rest
n = ser.inWaiting()
data = data + ser.read(n)
#sys.stdout.write(data)
#print(a1)
#data1=int(data)-128
deltat = time.time() - t0
tlist.appendleft((deltat1))
datatoplot = tlist.pop()
a1.appendleft((data))
datatoplot = a1.pop()
line.set_xdata(tlist)
line.set_ydata(a1)
plt.hold(False)
plt.draw()
This is the complete code I used, and yes I had already changed that line.pop . But as I explained earlier in the comment I am not able to get the time values in x axis