use webscraping to extract Tesla revenue data - python-requests-html

AttributeError:'dataframe' object has no attribute to'Tesla Revenue'
this is what i tried and the output

Hi try this code:
#3
tesla_tables = soup.find_all("table")
for index, table in enumerate(tesla_tables):
if("Tesla Quarterly Revenue" in str(table)):
tesla_table_index = index
tesla_revenue = pd.DataFrame(columns=["Date", "Revenue"])
for row in tesla_tables[tesla_table_index].tbody.find_all("tr"):
col = row.find_all("td")
date = col[0].text
revenue = col[1].text
tesla_revenue = tesla_revenue.append({"Date": date,
"Revenue": revenue}, ignore_index = True)
tesla_revenue["Revenue"] = tesla_revenue['Revenue'].str.replace(',|\$',"")
tesla_revenue.dropna(inplace=True)
tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""]
tesla_revenue.tail()

Related

Django queryset from raw SQL

I want an equivalent of this sql query in Django
SELECT Gender, ServCode
FROM [openimisproductTestDb_16_08_22].[dbo].[tblInsuree]
JOIN [openimisproductTestDb_16_08_22].[dbo].[tblServices] ON [openimisproductTestDb_16_08_22].[dbo].[tblInsuree].AuditUserID = [openimisproductTestDb_16_08_22].[dbo].[tblServices].AuditUserID
WHERE Gender = 'F'
AND ServCode = 'F4'
What I have tried:
def assisted_birth_with_cs_query(user, **kwargs):
date_from = kwargs.get("date_from")
date_to = kwargs.get("date_to")
hflocation = kwargs.get("hflocation")
format = "%Y-%m-%d"
date_from_object = datetime.datetime.strptime(date_from, format)
date_from_str = date_from_object.strftime("%d/%m/%Y")
date_to_object = datetime.datetime.strptime(date_to, format)
date_to_str = date_to_object.strftime("%d/%m/%Y")
dictBase = {
"dateFrom": date_from_str,
"dateTo": date_to_str,
}
dictGeo = {}
if hflocation and hflocation!="0" :
hflocationObj = HealthFacility.objects.filter(
code=hflocation,
validity_to__isnull=True
).first()
dictBase["fosa"] = hflocationObj.name
claimItem = Insuree.objects.filter(
validity_from__gte = date_from,
validity_to__lte = date_to,
**dictGeo,
gender = 'F'
).count()
data = Service.objects.filter(code = 'F4').count() | Insuree.objects.filter(gender = 'F').count()
dictGeo['health_facility'] = hflocationObj.id
dictBase["post"]= str(data)
return dictBase
I tried like that but the one just adds when I want the women included in the insured table and the F4 code contained in the service table. both tables have the auditUserID column in common
It would be great if you could add the models to better see the relations between Insuree and Service. Assuming it's a 1-M, I'd go with this query:
Service.objects.filter(code='F4', insuree__gender='F').count()

Updating database based on previous csv file uploads - delete - create - or update Python/Dajngo

Please need help with the following
I am trying to update database in comparison to previous uploaded csv file. I need to update all fields except the vin if it changes (vin is the unique value), delete the item if it is no longer in the csv file and create one if one is new
vin. stock_no make model trim miles
12345789098765432 4535 honda civic lx 89000
j4j4jj49098765432 3453 toyota corolla DX 54555
12345345438765432 6254 ford mustang es 101299
When I change any value and the csv is uploaded it makes a duplicate:
def upload_file__view(request):
form = form(request.POST or None, request.FILES or
None)
company = Comp_info.objects.last()
if form.is_valid():
form.save()
obj = c.objects.get(activated=False)
with open(obj.file_name.path, 'r+') as f:
reader = c.reader(f)
for i, row in enumerate(reader):
if i==0:
pass
else:
# row = "".join(row)
# row = row.replace(",", " ")
# row = row.split()
print(row)
print(type(row))
vin = row[0].upper()
condition = row[1].replace("U", "Used").replace("N", "New")
stock_no = row[2]
year = int(row[5])
make = row[3]
model = row[4]
trim = row[6]
mileage = row[8]
mpg_city = row[18]
mpg_hwy = row[19]
engine = row[9]
transmission = row[12]
fuel_type = row[11]
vehicle_type = row[7]
drive_type = row[20].replace("4X2", "2WD").replace("4X4", "4WD")
exterior_color = row[15]
interior_color = row[16]
price = row[13].replace("0", "")
description = row[22]
features_2 = row[21]
images_data = row[23]
raw_images_list = images_data.split(',')
images_list = [""] * 25
for x in range(image_count):
if x == 25:
break
images_list[x] = raw_images_list[x]
for x in images_list:
print(x)
if images_list[0] == "":
images_list[0] = "https://www.beverlyhillscarclub.com/template/images/ina_f.jpg"
car_photo = images_list[0]
car_photo_1 = images_list[1]
car_photo_2 = images_list[2]
car_photo_3 = images_list[3]
car_photo_4 = images_list[4]
car_photo_5 = images_list[5]
car_photo_6 = images_list[6]
car_photo_7 = images_list[7]
car_photo_8 = images_list[8]
car_photo_9 = images_list[9]
car_photo_10 = images_list[10]
car_photo_11 = images_list[11]
car_photo_12 = images_list[12]
car_photo_13 = images_list[13]
car_photo_14 = images_list[14]
car_photo_15 = images_list[15]
car_photo_16 = images_list[16]
car_photo_17 = images_list[17]
car_photo_18 = images_list[18]
car_photo_19 = images_list[19]
car_photo_20 = images_list[20]
car_photo_21 = images_list[21]
car_photo_22 = images_list[22]
car_photo_23 = images_list[23]
car_photo_24 = images_list[24]
# notes = pip._vendor.requests(images_list[0], stream=True)
#car_photo = row[23]
# user = User.objects.get(username=row[3])
Cars.objects.update_or_create(
vin = vin,
condition = condition,
stock_no = stock_no,
year = year,
make = make,
model = model,
trim = trim,
mileage = mileage,
mpg_city = mpg_city,
engine = engine,
transmission = transmission,
fuel_type = fuel_type,
vehicle_type = vehicle_type,
drive_type = drive_type,
exterior_color = exterior_color,
interior_color = interior_color,
price = price,
description = description,
company_name = company.company_name,
address = company.company_address,
city = company.city,
state = company.state,
zip = company.zip_code,
phone_number = company.phone_number,
email = company.fax_number,
features_2 = features_2,
car_photo = downloadFile(car_photo),
car_photo_1 = downloadFile(car_photo_1),
car_photo_2 = downloadFile(car_photo_2),
car_photo_3 = downloadFile(car_photo_3),
car_photo_4 = downloadFile(car_photo_4),
car_photo_5 = downloadFile(car_photo_5),
car_photo_6 = downloadFile(car_photo_6),
car_photo_7 = downloadFile(car_photo_7),
car_photo_8 = downloadFile(car_photo_8),
car_photo_9 = downloadFile(car_photo_9),
car_photo_10 = downloadFile(car_photo_10),
car_photo_11 = downloadFile(car_photo_11),
car_photo_12 = downloadFile(car_photo_12),
car_photo_13 = downloadFile(car_photo_13),
car_photo_14 = downloadFile(car_photo_14),
car_photo_15 = downloadFile(car_photo_15),
car_photo_16 = downloadFile(car_photo_16),
car_photo_17 = downloadFile(car_photo_17),
car_photo_18 = downloadFile(car_photo_18),
car_photo_19 = downloadFile(car_photo_19),
car_photo_20 = downloadFile(car_photo_20),
car_photo_21 = downloadFile(car_photo_21),
car_photo_22 = downloadFile(car_photo_22),
car_photo_23 = downloadFile(car_photo_23),
car_photo_24 = downloadFile(car_photo_24)
#car_photo = car_photo,
# quantity = int(row[2]),
# salesman = user
)
obj.activated = True
obj.save()
data = {
'form' : form,
'now' : now,
}
return render(request, 'uploads.html', data)
Thanks in advance for any help!
Thank you
Step 1
An empty list was created to compare with uploaded data:
imported_cars = []
Step 2
Created a filter of unique value (primary Key) and checked if it existed and used the method get to update items. Created car (item variable) to update or create ubject.
if Cars.objects.filter(vin=vin).exists():
car = Cars.objects.get(vin=vin)
Step 3
Used else statement to create item if it did not exist.
else:
car = Cars.objects.create(vin=vin, condition=condition...)
Last, out of the loop populated empty list with updated and created cars and deleted items that were in the database but not in the csv file.
imported_cars_vin_numbers = [car.vin for car in imported_cars]
for car in Cars.objects.all():
if car.vin not in imported_cars_vin_numbers:
car.delete()
Special thanks and credit to Zack Plauché who was extremely helpful and professional in helping me and teaching me how to solve this issue.
Your issue is in the model.py
you should write the Cars object with the following.
vin = models.CharField(primary_key=True, editable=False)
Confirm this works, since I am suggesting solution without actually seeing the model.py
This should handle the update aspect of your logic. The part where you delete a vin if its not in the CSV will have to be done with new process I don't see written here.But a suggestion would be to clear the DB and repopulate, or create function that compares DB with CSV and delete object if not in CSV.

Django filter using Q and multiple fields with different values

I am trying to generate a result that satisfies with the filter query below:
indicators = request.GET.getlist('indicators[]')
fmrprofiles = FMRPriority.objects.all()
q_objects = Q()
obj_filters = []
for indicator in indicators:
split_i = indicator.split('_')
if len(split_i) == 5:
if not any(d['indicator'] == split_i[1] for d in obj_filters):
obj_filters.append({
'indicator': split_i[1],
'scores': []
})
for o in obj_filters:
if split_i[1] == o['indicator']:
o['scores'].append(int(split_i[4]))
for obj in obj_filters:
print (obj['scores'])
q_objects.add(Q(pcindicator__id = int(obj['indicator'])) & Q(score__in=obj['scores']), Q.AND)
print (q_objects)
fmrprofiles = fmrprofiles.values('fmr__id','fmr__road_name').filter(q_objects).order_by('-fmr__date_validated')
print (fmrprofiles.query)
Basically, indicators is a list e.g. ['indicator_1_scoring_1_5', 'indicator_1_scoring_1_4', 'indicator_2_scoring_2_5']
I wanted to filter FMRPriority with these following fields:
pcindicator
score
e.g. pcindicator is equal 1 and scores selected are 5,4..another selection pcindicator is equal to 2 and scores selected are 3.
The query q_objects.add(Q(pcindicator__id = int(obj['indicator'])) & Q(score__in=obj['scores']), Q.AND) returns empty set..i have tried also the raw sql, same result.
Model:
class FMRPriority(models.Model):
fmr = models.ForeignKey(FMRProfile, verbose_name=_("FMR Project"), on_delete=models.CASCADE)
pcindicator = models.ForeignKey(PCIndicator, verbose_name=_("Priority Indicator"), on_delete=models.PROTECT)
score = models.FloatField(_("Score"))
I solve this by using OR and count the occurrence of id then exclude those are not equal to the length of filters:
for obj in obj_filters:
print (obj['scores'])
q_objects.add(
(Q(fmrpriority__pcindicator__id = int(obj['indicator'])) & Q(fmrpriority__score__in=obj['scores'])), Q.OR
)
fmrprofiles = fmrprofiles.values(*vals_to_display).filter(q_objects).annotate(
num_ins=Count('id'),
...
)).exclude(
~Q(num_ins = len(obj_filters))
).order_by('rank','road_name')

Get different result when use i iteration and np.sum function

I have a pandas Dataframe, I want to get the sum of the 'daily return' column of the data frame for every 60 days. The following is my code:
day = days() #days is a function to count business days.
for day>60:
for i in range(day-60,day):
current_x = sh600004['daily return'][i]
x_list.append(current_x)
x_sum = sum(x_list)
print x_sum
Here's what I got
To test the result, I used the following code:
y = sh600004
y.apply(lambda x: x.sum())
and I got different result.
The sum of 'daily return' column is not the same. When I print out my dataframe sh600004, I realize the data in x_sum is same as sh600004['daily return'], not the sum of it.
What do I need to do to get the sum of every 60 days of the daily return ? Can anyone help, please?
I don't know is that gonna help or not, but here's my the code I wrote so far:
#calculate daily return
daily_close = sh600004['close']
daily_pct_c = daily_close.pct_change().fillna(0)
sh600004['daily return'] = daily_pct_c
def days():
day = np.busday_count((datetime.datetime.strptime(sh600004['date'][0], '%Y/%m/%d')),pd.to_datetime(date), weekmask='1111100', holidays=holiday_list)
return day
def xn_deviation():
x_list = []
deviation_list = []
z_list = []
diff_list = []
result_list = []
day = days()
for i in range(0, 60):
current_x = sh600004['daily return'][i]
x_list.append(current_x)
x_sum = sum(x_list)
x_average = x_sum/len(x_list) #xn average
x_deviation = current_x - x_average #xn deviation
deviation_list.append(x_deviation)
dev_sum = sum(deviation_list) #calculate Z
z_list.append(dev_sum) #deviation sum list
r = max(z_list)-min(z_list) #calculate widest deviation
diff = np.square(current_x - x_average)
diff_list.append(diff)
sum_diff = sum(diff_list)
s = np.sqrt(sum_diff/len(x_list))
result_list = [r,s]
return result_list
else:
for i in range(day-60,day):
#same code as before
#loop
for date in sh600004.index:
days()
xn_deviation()

MSChart - Multiple columns grouped by date

I need to display % utilization for several production lines over several days. So my Y axis will be the % values. I need bars for each of the production lines on each of the days. So my X axis would be groups of columns each labeled for the production line and then grouped and labeled for the date. How would I do this with the MSchart.
Below is a sample of what I need. It shows only 2 production lines (I will need to display more than 2 production lines) and does not include the production line name in the X-axis label.
I am almost there. Here is an image of the chart I am creating:
And here is the code that created it:
Private Sub ChartSetup()
Try
dvCapacityUtilization.RowFilter = ""
dvCapacityUtilization.Sort = "Period ASC, CutUpSet ASC"
Me.cuChart.BeginInit()
With Me.cuChart
.ChartAreas(0).AxisX.Interval = 30
.ChartAreas(0).AxisX.LabelStyle.Format = "MM/yy"
.ChartAreas(0).AxisX.LabelStyle.Angle = -90
.ChartAreas(0).AxisY.MajorGrid.LineColor = Color.Gray
.ChartAreas(0).AxisX.MajorGrid.LineColor = Color.White
.ChartAreas(0).AxisX.MinorGrid.LineColor = Color.White
.DataBindCrossTable(dvCapacityUtilization, _
"CutUpSet", "Period", "CapacityUtilization", "Label = CapacityUtilization")
End With
'
For Each series In Me.cuChart.Series
series.IsValueShownAsLabel = False
series.LabelFormat = "0.0%"
series.SetCustomProperty("PointWidth", "0.5")
series.SetCustomProperty("DrawingStyle", "Cylinder")
series.XValueType = DataVisualization.Charting.ChartValueType.Date
Next
Me.cuChart.EndInit()
Catch ex As Exception
ErrHandler(Me.Name & " - Chart Setup", ex)
End Try
End Sub
How do I turn the value labels off? In my code I used IsValueShownAsLabel = False but they are still displayed. I eventually will allow the user to turn the values on or off.
How do I format the values as XX.X%. In my code I used LabelFormat = 0.0% but that did not work.
How can I get the X axis labels to be the dates associated with the data values. In my code I used AxisX.Interval = 30 just to get the labels on the chart.
Follow up:
As stated in #1 above, I used series.IsValueShownAsLabel = False but the value labels were still displayed. To remove them I had to do the following:
For Each point in series.Points
point.Label = String.Empty
Next
Why should I have to do this instead of using IsValueShownAsLabel = False?
WINFORM : Finally find out with some R&D. Please follow below steps.
1. Drag and Drop new chart control from ToolBox in new winform.
2. Remove default "Series1" as it's dynamically generated as per column values.
3. Assume that we have following sample datatable.
public DataTable GetDataTable()
{
DataTable dt = new DataTable();
dt.Columns.Add("UtilizationDate", typeof(DateTime));
dt.Columns.Add("ProductionLine", typeof(string));
dt.Columns.Add("UtilizationValue", typeof(int));
DataRow dr = dt.NewRow();
dr[0] = DateTime.Now;
dr[1] = "Proy01";
dr[2] = 25;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now;
dr[1] = "Proy02";
dr[2] = 15;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now;
dr[1] = "Proy03";
dr[2] = 125;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(1);
dr[1] = "Proy01";
dr[2] = 13;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(1);
dr[1] = "Proy02";
dr[2] = 111;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(1);
dr[1] = "Proy03";
dr[2] = 77;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(2);
dr[1] = "Proy01";
dr[2] = 13;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(2);
dr[1] = "Proy02";
dr[2] = 111;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(2);
dr[1] = "Proy03";
dr[2] = 77;
dt.Rows.Add(dr);
return dt;
}
4. Now add following line of code using DataBindCrossTable.
chart1.ChartAreas["ChartArea1"].AxisX.IntervalType = DateTimeIntervalType.Months;
chart1.ChartAreas["ChartArea1"].AxisX.Interval = 1;
chart1.ChartAreas[0].AxisX.LabelStyle.Format = "MM-yy";
DataTable dt = GetDataTable();
chart1.DataBindCrossTable(dt.DefaultView, "ProductionLine", "UtilizationDate", "UtilizationValue", "Label=UtilizationValue");
foreach (Series item in chart1.Series)
{
item.IsValueShownAsLabel = true;
item["PixelPointWidth"] = "50";
item["DrawingStyle"] = "Cylinder";
item.XValueType = ChartValueType.DateTime;
}