recording with pyaudio giving junk values even if MIC is muted or in pin drop silence - python-2.7

I am working on recording using pyaudio on windows. I am plotting recorded sound using matplotlib.
Recording length is 60 sec.
buffer size is 1024
What i am getting is, while recording, for first few iterations i am getting junk values. After that it starts recording actual sound.
I also found that, even if the MIC is muted, its giving junk values wjen i plot it.
These junk values are affecting result of my computations.
Any idea, why this junk values/sound is getting recorded?? Any quick solution??
Code:
class record(Thread):
def __init__(self):
#Thread.__init__(self)
super(record, self).__init__()
self.lock=threading.Lock()
self.project=projectutils.getActiveProject()
self.outfile=self.project['name']+'.wav'
self.tw=tool.getToolWindow()
self.duration = 60 #record for 1 second. Pretty long duration don't you think
self.buffer=1024
self.pin = pyaudio.PyAudio()
self.channels=2
ccare.rate=self.rate=8820
self.format=pyaudio.paInt16
self.inStream = self.pin.open(format=self.format, channels=self.channels, rate=self.rate,input=True, frames_per_buffer=self.buffer)
self.flag=1
self.out = []
self.upper_lim = self.rate / self.buffer * self.duration
def run(self):
ccare.val=[]
x=[]
if not self.inStream:
return
self.lock.acquire()
data = self.inStream.read(self.buffer)
self.lock.release()
x=list(struct.unpack("%dh"%(len(data)/2),data))
self.lock.acquire()
ccare.val=ccare.val+list(x)
self.lock.release()
self.out.append(data)
for i in xrange(1, self.upper_lim):
x=[]
if not self.inStream:
break
data = self.inStream.read(self.buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
self.lock.acquire()
ccare.val=ccare.val+list(x)
self.lock.release()
self.out.append(data)
if self.inStream:
self.inStream.stop_stream()
self.inStream.close()
self.pin.terminate()
self.save_file()
Simple Code:
import pyaudio
import wave
import struct
val = []
def record(out_file):
duration = 60 #record for 1 second. Pretty long duration don't you think
buffer=1024
pin = pyaudio.PyAudio()
channels=2
rate=8820
format=pyaudio.paInt16
inStream = pin.open(format=format, channels=channels, rate=rate,input=True, frames_per_buffer=buffer)
out = []
upper_lim = rate / buffer * duration
val=[]
x=[]
if not inStream:
return
data = inStream.read(buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
val=val+list(x)
out.append(data)
for i in xrange(1, upper_lim):
x=[]
if not inStream:
break
data = inStream.read(buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
val=val+list(x)
out.append(data)
if inStream:
inStream.stop_stream()
inStream.close()
pin.terminate()
The values stored in 'val' variable will be plotted in different thread using matplotlib.

Related

cv2.VideoCapture freeze framing when using a countdown timer with pressed key 's' for rock paper scissors project

I have defined two different functions, one for getting the prediction of the user using the Teachable Machine from Google and then a countdown function which is initiated by the 'S' key within the loop.
My issue is that when I click the 'S' key my Video Capture freeze frames and begins the countdown, when all I want it to be doing is allowing the user to ready themselves for creating one of three gestures.
The code is below:
def get_user_choice(self):
user_choice = ''
while True:
ret, frame = cap.read()
resized_frame = cv2.resize(frame, (224, 224), interpolation = cv2.INTER_AREA)
image_np = np.array(resized_frame)
normalized_image = (image_np.astype(np.float32) / 127.0) - 1 # Normalize the image
data[0] = normalized_image
prediction = model.predict(data, verbose=0)
cv2.imshow('frame', frame)
k = cv2.waitKey(1)
if k == ord('s'):
countdown()
user_index = np.argmax(prediction[0])
user_choice = user_options[user_index]
print('You chose:', user_choice)
return user_choice
def countdown():
print('Get your answer ready!')
prev = time.time() + 1
TIMER = int(4)
while TIMER >= 1:
cur = time.time()
if cur - prev >= 1:
prev = cur
TIMER = TIMER -1
print(TIMER)
It works almost perfectly apart from the countdown function which has created a freeze frame issue.
I've tried to mess around with the countdown function, and put the videocapture into the countdown function. Nothing seems to have worked for me so far, and thus is causing frustration.

Pytorch loading saved weights but not working

I have this code. I save weights after every epoch and the code saves it. But when I load the weights the loss value starts from the initial loss value which means the loading is failing somehow.
net = torchvision.models.resnet18(pretrained=True)
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 136)
def train():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = L1Loss(reduction='sum')
lr = 0.0000001
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0005)
net.to(device)
state = torch.load('face2.txt')
net.load_state_dict(state['state_dict'])
optimizer.load_state_dict(state['optimizer'])
for epoch in range(int(0), 200000):
for batch, data in enumerate(trainloader, 0):
torch.cuda.empty_cache()
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs).reshape(-1, 68, 2)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
state = {
'epoch': epoch,
'state_dict': net.state_dict(),
'optimizer': optimizer.state_dict(),
}
torch.save(state, 'face2.txt')
if __name__ == '__main__':
train()
The initial loss is 50k plus and after some thousand epochs the loss becomes 50-60. Now when I re-run the code I expect it to start from a near loss value but it starts from around 50k again.
The code you wrote:
net = torchvision.models.resnet18(pretrained=True)
means you start over again with the same net - pretrained resnet18. Instead, you should load the last state if present, this would fix your problem.
I will slightly update your notation:
state = {
'epoch': epoch,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
...
}
torch.save(state, filepath)
Learnable parameters are the first state_dict (model state dict).
The second state_dict is the optimizer state dict. You recall that the optimizer is used to improve our learnable parameters. But the optimizer state_dict is fixed. Nothing to learn in there.
Your code at some point should look like:
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer_state_dict'])

Using Python Tkinter .config() method

I am trying to use the Python Tkinter .config() method to update some message text. I can't get it to work. What might I be doing wrong (see the update_message method):
#!/usr/bin/python
import alsaaudio as aa
import audioop
import Tkinter as tk
import tkFont
import threading
import Queue
# styles
BACKROUND_COLOR = '#000000'
TYPEFACE = 'Unit-Bold'
FONT_SIZE = 50
TEXT_COLOR = '#777777'
TEXTBOX_WIDTH = 400
# text
TITLE = 'listen closely'
SCORE_MESSAGE = 'your score:\n '
END_MESSAGE = 'too loud!\ntry again'
# configuration
DEVICE = 'hw:1' # hardware sound card index
CHANNELS = 1
SAMPLE_RATE = 8000 # Hz // 44100
PERIOD = 256 # Frames // 256
FORMAT = aa.PCM_FORMAT_S8 # Sound format
NOISE_THRESHOLD = 3
class Display(object):
def __init__(self, parent, queue):
self.parent = parent
self.queue = queue
self._geom = '200x200+0+0'
parent.geometry("{0}x{1}+0+0".format(
parent.winfo_screenwidth(), parent.winfo_screenheight()))
parent.overrideredirect(1)
parent.title(TITLE)
parent.configure(background=BACKROUND_COLOR)
parent.displayFont = tkFont.Font(family=TYPEFACE, size=FONT_SIZE)
self.process_queue()
def process_queue(self):
try:
score = self.queue.get(0)
self.print_message(score)
except Queue.Empty:
pass
self.parent.after(100, self.update_queue)
def update_queue(self):
try:
score = self.queue.get(0)
self.update_message(score)
except Queue.Empty:
pass
self.parent.after(100, self.update_queue)
def print_message(self, messageString):
print 'message', messageString
displayString = SCORE_MESSAGE + str(messageString)
self.message = tk.Message(
self.parent, text=displayString, bg=BACKROUND_COLOR,
font=self.parent.displayFont, fg=TEXT_COLOR, width=TEXTBOX_WIDTH, justify="c")
self.message.place(relx=.5, rely=.5, anchor="c")
def update_message(self, messageString):
print 'message', messageString
displayString = SCORE_MESSAGE + str(messageString)
self.message.config(text=displayString)
def setup_audio(queue, stop_event):
data_in = aa.PCM(aa.PCM_CAPTURE, aa.PCM_NONBLOCK, 'hw:1')
data_in.setchannels(2)
data_in.setrate(44100)
data_in.setformat(aa.PCM_FORMAT_S16_LE)
data_in.setperiodsize(256)
while not stop_event.is_set():
# Read data from device
l, data = data_in.read()
if l:
# catch frame error
try:
max_vol = audioop.rms(data, 2)
scaled_vol = max_vol // 4680
print scaled_vol
if scaled_vol <= 3:
# Too quiet, ignore
continue
queue.put(scaled_vol)
except audioop.error, e:
if e.message != "not a whole number of frames":
raise e
def main():
root = tk.Tk()
queue = Queue.Queue()
window = Display(root, queue)
stop_event = threading.Event()
audio_thread = threading.Thread(target=setup_audio,
args=[queue, stop_event])
audio_thread.start()
try:
root.mainloop()
finally:
stop_event.set()
audio_thread.join()
pass
if __name__ == '__main__':
main()
I don't want to be laying down a new message every time I update. If the .config() doesn't work, is there another method to update the text configuration of the message?
I would use string variables, first create your string variable then set it to want you want it to display at the start next make your object and in text put the sting variable then when you want to change the text in the object change the string variable.
self.messaget = StringVar()
self.messaget.set("")
self.message = tk.Message(
self.parent, textvariable=self.messaget, bg=BACKROUND_COLOR,
font=self.parent.displayFont, fg=TEXT_COLOR,
width=TEXTBOX_WIDTH, justify="c").grid()
#note renember to palce the object after you have created it either using
#.grid(row = , column =) or .pack()
#note that it is textvariable instead of text if you put text instead it will run but
#but will show PY_Var instead of the value of the variable
edit
to change the text without recreating the object you do the name of the string variable you have used and .set
self.messaget.set("hi")

Pyaudio : how to compress audio stream

I'm currently developping a VOIP tool in python working as a client-server as follows :
CHUNK = 1024
p = pyaudio.PyAudio()
stream = p.open(format = pyaudio.paInt16,
channels = 1,
rate = 44100,
input = True,
frames_per_buffer = CHUNK)
while 1:
connection.sendVoice(stream.read(CHUNK))
How could I proceed to compress the sent data to spare connection, maybe increase speed, ...
import time, sys,io
import pymedia.audio.sound as sound
import pymedia.audio.acodec as acodec
import pymedia.muxer as muxer
def voiceRecorder( secs, name ):
f = open(name,'wb')
secs = secs*5
dm= muxer.Demuxer('mp3')
snds= sound.getODevices()
rt = 44100
cparams= { 'id': acodec.getCodecID( 'mp3' ),
'bitrate': 128000/4,
'sample_rate': rt,
'channels': 2 }
ac= acodec.Encoder( cparams )
snd= sound.Input( rt, 2, sound.AFMT_S16_LE )
snd.start()
start_time = time.time()
while snd.getPosition()<= secs:
s= snd.getData()
if s and len( s ):
for fr in ac.encode( s ):
f.write( fr)
else:
time.sleep(.25)
snd.stop()
if __name__ == "__main__":
if len( sys.argv )!= 3:
print 'Usage: voice_recorder <seconds> <file_name>'
else:
voiceRecorder( int( sys.argv[ 1 ] ), sys.argv[ 2 ] )
Wow for my calculations are you sending a little more that 2KB, to be exact 2.0480KB of audio at every loop to your socket connection:
(16 * 44.1 / 8) * 1024/44100 = 2.0480
One simple way to send less data is reducing the chunk size if you change the chunk to 512 now are you sending just a bit more that 1KB.
Another way is try use zlib package in python to compress the stream.read data after send it through your socket, but for it do not forget to unzip in the other side.

gstreamer : audiosink to output stream of integers representing volume levels

I need a gstreamer audio sink that outputs integers that
represent volume level of an audio stream. The sampling rate
need not be the same as the incoming audio stream, it can be much
lower, ex.: one value per second would be sufficient.
Does such a sink exist ?
It seems that this one could be modified to do this :
http://gstreamer.freedesktop.org/data/doc/gstreamer/head/gst-plugins-base-plugins/html/gst-plugins-base-plugins-volume.html
But if something already exists I'd prefer to avoid writing one !
there indeed is such an element, it's not a sink though but I don't think you need it to be for that task anyway :)
It is called level (http://gstreamer.freedesktop.org/data/doc/gstreamer/head/gst-plugins-good-plugins/html/gst-plugins-good-plugins-level.html), and as you can see there is an "interval" property that you can tweak.
We use this element in our video editor to draw waveforms, here take this simplified script :
from gi.repository import Gst
from gi.repository import GLib
import sys
mainloop = GLib.MainLoop()
def _messageCb(bus, message):
if str(type(message.src)) == "<class '__main__.__main__.GstLevel'>":
s = message.get_structure()
p = None
if s:
p = s.get_value("rms")
if p:
st = s.get_value("stream-time")
print "rms = " + str(p) + "; stream-time = " + str(st)
if message.type == Gst.MessageType.EOS:
mainloop.quit()
elif message.type == Gst.MessageType.ERROR:
bus.disconnect_by_func(_messageCb)
mainloop.quit()
if __name__=="__main__":
global mainloop
Gst.init([])
pipeline = Gst.parse_launch("uridecodebin name=decode uri=" + sys.argv[1] + " ! audioconvert ! level name=wavelevel interval=10000000 post-messages=true ! fakesink qos=false name=faked")
faked = pipeline.get_by_name("faked")
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect("message", _messageCb)
pipeline.set_state(Gst.State.PLAYING)
mainloop.run()
pipeline.set_state(Gst.State.NULL)
May I inquire about your use case ?